All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.upokecenter.text.encoders.EncodingUtf16 Maven / Gradle / Ivy

package com.upokecenter.text.encoders;

import java.io.*;
import com.upokecenter.util.*;
import com.upokecenter.text.*;

  public class EncodingUtf16 implements ICharacterEncoding {
    private static class EndianFreeEncoder implements ICharacterEncoder {
      private ICharacterEncoder encoder;
      private boolean start;

      public EndianFreeEncoder() {
        this.start = true;
        this.encoder = new Encoder(false);  // little endian
      }

      public int Encode(int c, IWriter output) {
        if (this.start) {
          if (c < 0) {
            return -1;
          }
          ArrayWriter aw = new ArrayWriter();
          aw.write((byte)0xff);
          aw.write((byte)0xfe);
          int ret = this.encoder.Encode(c, aw);
          if (ret >= 0) {
            byte[] bytes = aw.ToArray();
            aw.write(bytes, 0, bytes.length);
            this.start = false;
            return ret + 2;
          }
          return ret;
        } else {
          return this.encoder.Encode(c, output);
        }
      }
    }

    private static class EndianFreeDecoder implements ICharacterDecoder {
      private ICharacterDecoder decoder;
      private boolean start;

      public EndianFreeDecoder() {
        this.start = true;
      }

      public int ReadChar(IByteReader stream) {
        if (!this.start) {
          return this.decoder.ReadChar(stream);
        }
        int c = stream.read();
        if (c < 0) {
          return -1;
        }
        int c2 = stream.read();
        if (c2 < 0) {
          return -2;  // Only one byte
        }
        this.start = false;
        if (c == 0xff && c2 == 0xfe) {
          this.decoder = new Decoder(false);
          return this.decoder.ReadChar(stream);
        } else {
          // Anything else, including FE FF, is
          // treated as big endian (see RFC 2781 sec. 4.3)
          if (c == 0xfe && c2 == 0xff) {
            // absorb BOM
            this.decoder = new Decoder(true);
            return this.decoder.ReadChar(stream);
          } else if (c >= 0xd8 && c <= 0xdb) {
            // surrogate
            this.decoder = new Decoder(true, (c << 8) | c2);
            return this.decoder.ReadChar(stream);
          } else if (c >= 0xdc && c <= 0xdf) {
            // unpaired surrogate
            this.decoder = new Decoder(true);
            return -2;
          } else {
            // anything else {
            this.decoder = new Decoder(true);
          }
          return (c << 8) | c2;
        }
      }
    }

    private static class Decoder implements ICharacterDecoder {
      private final DecoderState state;
      private final boolean bigEndian;
      private int lead;
      private int surrogate;

      public Decoder(boolean bigEndian, int surrogate) {
        this.bigEndian = bigEndian;
        this.state = new DecoderState(1);
        this.lead = -1;
        this.surrogate = surrogate;
      }

      public Decoder(boolean bigEndian) {
 this(bigEndian, -1);
      }

      public int ReadChar(IByteReader stream) {
        while (true) {
          int b = this.state.ReadInputByte(stream);
          if (b < 0) {
            if (this.lead >= 0 || this.surrogate >= 0) {
              this.lead = this.surrogate = -1;
              return -2;
            }
            return -1;
          }
          if (this.lead < 0) {
            this.lead = b;
            continue;
          }
          int code = this.bigEndian ? b + (this.lead << 8) : this.lead + (b <<
               8);
          this.lead = -1;
          if (this.surrogate >= 0) {
            if ((code & 0xfc00) == 0xdc00) {
              code = 0x10000 + (code - 0xdc00) + ((this.surrogate - 0xd800) <<
                    10);
              this.surrogate = -1;
              return code;
            }
            this.lead = -1;
            int b1 = (code >> 8) & 0xff;
            int b2 = code & 0xff;
            if (this.bigEndian) {
              this.state.PrependTwo(b1, b2);
            } else {
              this.state.PrependTwo(b2, b1);
            }
            this.surrogate = -1;
            return -2;
          }
          if ((code & 0xfc00) == 0xd800) {
            this.surrogate = code;
          } else {
            return (code & 0xfc00) == 0xdc00 ? -2 : code;
          }
        }
      }
    }

    private static class Encoder implements ICharacterEncoder {
      private final boolean bigEndian;

      public Encoder(boolean bigEndian) {
        this.bigEndian = bigEndian;
      }

      public int Encode(
       int c,
       IWriter output) {
        if (c < 0) {
          return -1;
        }
        if (c > 0x10ffff || ((c & 0x1ff800) == 0xd800)) {
          return -2;
        }
        int b1, b2;
        if (c <= 0xffff) {
          b1 = (c >> 8) & 0xff;
          b2 = c & 0xff;
          if (this.bigEndian) {
            output.write((byte)b1);
            output.write((byte)b2);
          } else {
            output.write((byte)b2);
            output.write((byte)b1);
          }
          return 2;
        }
        byte[] buf = new byte[4];
        int c1 = ((c - 0x10000) >> 10) + 0xd800;
        int c2 = ((c - 0x10000) & 0x3ff) + 0xdc00;
        b1 = (c1 >> 8) & 0xff;
        b2 = c1 & 0xff;
        if (this.bigEndian) {
          buf[0] = (byte)b1;
          buf[1] = (byte)b2;
        } else {
          buf[0] = (byte)b2;
          buf[1] = (byte)b1;
        }
        b1 = (c2 >> 8) & 0xff;
        b2 = c2 & 0xff;
        if (this.bigEndian) {
          buf[2] = (byte)b1;
          buf[3] = (byte)b2;
        } else {
          buf[2] = (byte)b2;
          buf[3] = (byte)b1;
        }
        output.write(buf, 0, 4);
        return 4;
      }
    }

    public static ICharacterDecoder GetDecoder2(int kind) {
      // kind: 0-little endian, 1-big endian, 2-unlabeled
      return (kind == 2) ? ((ICharacterDecoder)new EndianFreeDecoder()) :
        ((ICharacterDecoder)new Decoder(kind == 1));
    }

    public static ICharacterEncoder GetEncoder2(int kind) {
      // kind: 0-little endian, 1-big endian, 2-unlabeled
      return (kind == 2) ? ((ICharacterEncoder)new EndianFreeEncoder()) :
        ((ICharacterEncoder)new Encoder(kind == 1));
    }

    public ICharacterDecoder GetDecoder() {
      return GetDecoder2(2);
    }

    public ICharacterEncoder GetEncoder() {
      return GetEncoder2(2);
    }
  }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy