com.jfinal.template.io.Utf8Encoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of enjoy Show documentation
Show all versions of enjoy Show documentation
Enjoy is a simple, light, rapid, independent, extensible Java Template Engine.
/**
* Copyright (c) 2011-2019, James Zhan 詹波 ([email protected]).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.jfinal.template.io;
import java.nio.charset.MalformedInputException;
/**
* Utf8Encoder
*
* http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/8u40-b25/sun/nio/cs/UTF_8.java?av=f
* http://grepcode.com/search?query=ArrayEncoder&start=0&entity=type&n=
*/
public class Utf8Encoder extends Encoder {
public static final Utf8Encoder me = new Utf8Encoder();
public float maxBytesPerChar() {
return 3.0F;
}
public int encode(char[] chars, int offset, int len, byte[] bytes) {
int sl = offset + len;
int dp = 0;
int dlASCII = dp + Math.min(len, bytes.length);
// ASCII only optimized loop
while (dp < dlASCII && chars[offset] < '\u0080') {
bytes[dp++] = (byte) chars[offset++];
}
while (offset < sl) {
char c = chars[offset++];
if (c < 0x80) {
// Have at most seven bits
bytes[dp++] = (byte) c;
} else if (c < 0x800) {
// 2 bytes, 11 bits
bytes[dp++] = (byte) (0xc0 | (c >> 6));
bytes[dp++] = (byte) (0x80 | (c & 0x3f));
} else if (c >= '\uD800' && c < ('\uDFFF' + 1)) { //Character.isSurrogate(c) but 1.7
final int uc;
int ip = offset - 1;
if (Character.isHighSurrogate(c)) {
if (sl - ip < 2) {
uc = -1;
} else {
char d = chars[ip + 1];
if (Character.isLowSurrogate(d)) {
uc = Character.toCodePoint(c, d);
} else {
throw new RuntimeException("encode UTF8 error", new MalformedInputException(1));
}
}
} else {
if (Character.isLowSurrogate(c)) {
throw new RuntimeException("encode UTF8 error", new MalformedInputException(1));
} else {
uc = c;
}
}
if (uc < 0) {
bytes[dp++] = (byte) '?';
} else {
bytes[dp++] = (byte) (0xf0 | ((uc >> 18)));
bytes[dp++] = (byte) (0x80 | ((uc >> 12) & 0x3f));
bytes[dp++] = (byte) (0x80 | ((uc >> 6) & 0x3f));
bytes[dp++] = (byte) (0x80 | (uc & 0x3f));
offset++; // 2 chars
}
} else {
// 3 bytes, 16 bits
bytes[dp++] = (byte) (0xe0 | ((c >> 12)));
bytes[dp++] = (byte) (0x80 | ((c >> 6) & 0x3f));
bytes[dp++] = (byte) (0x80 | (c & 0x3f));
}
}
return dp;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy