
com.upokecenter.text.encoders.EncodingISO2022JP2 Maven / Gradle / Ivy
package com.upokecenter.text.encoders;
import java.io.*;
import com.upokecenter.util.*;
import com.upokecenter.text.*;
public class EncodingISO2022JP2 implements ICharacterEncoding {
private static class Decoder implements ICharacterDecoder {
private final DecoderState state;
private int machineState;
private int outputState;
private int lead;
private int output;
private int leadTrailSet;
private static final int[] Iso88597 = { 128, 129, 130,
131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158,
159, 160, 8216, 8217, 163, 8364, 8367, 166, 167, 168, 169, 890, 171,
172, 173, -2, 8213, 176, 177, 178, 179, 900, 901, 902, 183, 904, 905,
906, 187, 908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919,
920, 921, 922, 923, 924, 925, 926, 927, 928, 929, -2, 931, 932, 933,
934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947,
948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961,
962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, -2 };
public Decoder() {
this.state = new DecoderState(2);
}
public int ReadChar(IByteReader stream) {
while (true) {
int b = this.state.ReadInputByte(stream);
switch (this.machineState) {
case 0:
// ASCII
if (b == 0x1b) {
this.machineState = 1;
} else if (b < 0) {
return -1;
} else if (b <= 0x7f && b != 0x0e && b != 0x0f) {
output = 0;
return b;
} else {
output = 0;
return -2;
}
break;
case 1:
// Escape start
if (b == 0x24 || b == 0x28 || b == '.') {
this.lead = b;
this.machineState = 2;
} else {
state.PrependOne(b);
output = 0;
machineState = outputState;
return -2;
}
break;
case 2: {
// Escape
int tmpState = -1;
if (this.lead == 0x24 && (b == 0x40 || b == 0x42)) {
tmpState = 4; // JIS0208
this.leadTrailSet = 0;
this.lead = 0;
} else if (this.lead == 0x24 && b == 0x41) {
tmpState = 4; // GB2312
this.leadTrailSet = 1;
this.lead = 0;
} else if (this.lead == 0x28 && b == 0x42) {
tmpState = 0; // Ascii
this.lead = 0;
} else if (this.lead == '.' && b == 'A') {
tmpState = 7; // ISO-8859-1
this.lead = 0;
} else if (this.lead == '.' && b == 'F') {
tmpState = 8; // ISO-8859-7
this.lead = 0;
} else if (this.lead == 0x28 && b == 0x4a) {
tmpState = 3; // Roman
this.lead = 0;
} else if (this.lead == 0x24 && b == '(') {
tmpState = 9; // Escape final
this.lead = 0;
} else if (this.lead == 0x28 && b == 0x49) {
tmpState = 6;
this.lead = 0;
} else {
this.state.PrependTwo(this.lead, b);
this.lead = 0;
this.machineState = this.outputState;
return -2;
}
this.machineState = this.outputState = tmpState;
if (this.output != 0) {
return -2;
} else if (tmpState != 9) {
this.output = 1;
}
break;
}
case 9: {
// Escape final
int tmpState = -1;
if (b == 'C') {
tmpState = 4; // KSC5601
this.leadTrailSet = 3;
this.lead = 0;
} else if (b == 'D') {
tmpState = 4; // JIS0212
this.leadTrailSet = 2;
this.lead = 0;
} else {
this.state.PrependThree(this.lead, 0x28, b);
this.lead = 0;
this.machineState = this.outputState;
return -2;
}
this.machineState = this.outputState = tmpState;
if (this.output != 0) {
return -2;
} else {
this.output = 1;
}
break;
}
case 4:
// Lead
if (b == 0x1b) {
this.machineState = 1;
} else if (b >= 0x21 && b <= 0x7e) {
output = 0;
lead = b;
machineState = 5;
} else if (b < 0) {
return -1;
} else {
output = 0;
return -2;
}
break;
case 5: // Trail
if (b < 0) {
this.machineState = 4;
this.state.PrependOne(b);
return -2;
} else if (b == 0x1b) {
machineState = 1;
return -2;
} else if (b >= 0x21 && b <= 0x7e) {
machineState = 4;
int p = -1;
int c = -1;
if (leadTrailSet == 0) {
// JIS 0208
p = (lead - 0x21) * 94 + (b - 0x21);
c = Jis0208.IndexToCodePoint(p);
} else if (leadTrailSet == 1) {
// GB2312
p = (lead - 0x21) * 190 + (b - 0x21) + 6176;
c = Gb18030.IndexToCodePoint(p);
} else if (leadTrailSet == 2) {
// JIS 0212
p = (lead - 0x21) * 94 + (b - 0x21);
c = Jis0212.IndexToCodePoint(p);
} else if (leadTrailSet == 3) {
// KSC 5601
p = (lead - 0x21) * 190 + (b - 0x21) + 6176;
c = Korean.IndexToCodePoint(p);
}
return c < 0 ? -2 : c;
} else {
machineState = 4;
return -2;
}
case 6: // Katakana
if (b == 0x1b) {
this.machineState = 1;
} else if (b >= 0x21 && b <= 0x5f) {
output = 0;
return 0xff40 + b;
} else if (b < 0) {
return -1;
} else {
output = 0;
return -2;
}
break;
case 7: // ISO-8859-1
if (b == 0x1b) {
this.machineState = 1;
} else if (b >= 0x20 && b <= 0x7f) {
output = 0;
return 0x80 + b;
} else if (b < 0) {
return -1;
} else {
output = 0;
return -2;
}
break;
case 8: // ISO-8859-7
if (b == 0x1b) {
this.machineState = 1;
} else if (b >= 0x20 && b <= 0x7f) {
output = 0;
return Iso88597[b];
} else if (b < 0) {
return -1;
} else {
output = 0;
return -2;
}
break;
case 3: // Roman
if (b == 0x1b) {
this.machineState = 1;
} else if (b == 0x5c) {
output = 0;
return 0xa5;
} else if (b == 0x7e) {
output = 0;
return 0x203e;
} else if (b < 0x7f && b != 0x0e && b != 0x0f) {
output = 0;
return b;
} else if (b < 0) {
return -1;
} else {
output = 0;
return -2;
}
break;
default: {
// NOTE: Escape final state is no longer used
throw new IllegalStateException("Unexpected state");
}
}
}
}
}
private static class Encoder implements ICharacterEncoder {
private int encoderState;
public Encoder() {
}
private static int[] katakana = { 12290, 12300, 12301, 12289, 12539,
12530, 12449, 12451, 12453, 12455, 12457, 12515, 12517, 12519,
12483, 12540, 12450, 12452, 12454, 12456, 12458, 12459, 12461,
12463, 12465, 12467, 12469, 12471, 12473, 12475, 12477, 12479,
12481, 12484, 12486, 12488, 12490, 12491, 12492, 12493, 12494,
12495, 12498, 12501, 12504, 12507, 12510, 12511, 12512, 12513,
12514, 12516, 12518, 12520, 12521, 12522, 12523, 12524, 12525,
12527, 12531, 12443, 12444 };
public int Encode(int c, IWriter output) {
int count = 0;
while (true) {
if (c < 0) {
if (this.encoderState != 0) {
this.encoderState = 0;
output.write((byte)0x1b);
output.write((byte)0x28);
output.write((byte)0x42);
return count + 3;
} else {
return -1;
}
}
if (c <= 0x7f) {
if ((this.encoderState == 0 || this.encoderState == 3) &&
(c == 0x0e || c == 0x0f || c == 0x1b)) {
// TODO: Find a way to convey errors with
// a different code point, in this case, U + FFFD
return -2;
}
if (this.encoderState == 0) {
output.write((byte)c);
return 1 + count;
} else if (this.encoderState == 3 && c != 0x5c && c != 0x7e) {
output.write((byte)c);
return 1 + count;
} else {
this.encoderState = 0;
output.write((byte)0x1b);
output.write((byte)0x28);
output.write((byte)0x42);
count += 3;
continue;
}
}
if (this.encoderState == 3 && c == 0xa5) {
output.write((byte)0x5c);
return 1 + count;
}
if (this.encoderState == 3 && c == 0x203e) {
output.write((byte)0x7e);
return 1 + count;
}
if (this.encoderState != 3 && (c == 0xa5 || c == 0x203e)) {
this.encoderState = 3;
output.write((byte)0x1b);
output.write((byte)0x28);
output.write((byte)0x4a);
count += 3;
continue;
}
if (c >= 0xff61 && c < 0xffa0) {
c = katakana[c - 0xff61];
}
if (c == 0x2212) {
c = 0xff0d;
}
int cp = Jis0208.CodePointToIndex(c);
if (cp < 0) {
return -2;
}
if (this.encoderState != 4) {
this.encoderState = 4;
output.write((byte)0x1b);
output.write((byte)0x24);
output.write((byte)0x42);
count += 3;
}
int a = cp / 94;
int b = cp % 94;
output.write((byte)(a + 0x21));
output.write((byte)(b + 0x21));
return count + 2;
}
}
}
public ICharacterDecoder GetDecoder() {
return new Decoder();
}
public ICharacterEncoder GetEncoder() {
return new Encoder();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy