All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.caucho.xml2.readers.Utf8Reader Maven / Gradle / Ivy

/*
 * Copyright (c) 1998-2018 Caucho Technology -- all rights reserved
 *
 * This file is part of Resin(R) Open Source
 *
 * Each copy or derived work must preserve the copyright notice and this
 * notice unmodified.
 *
 * Resin Open Source is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Resin Open Source is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
 * of NON-INFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Resin Open Source; if not, write to the
 *   Free SoftwareFoundation, Inc.
 *   59 Temple Place, Suite 330
 *   Boston, MA 02111-1307  USA
 *
 * @author Scott Ferguson
 */

package com.caucho.xml2.readers;

import com.caucho.util.CharBuffer;
import com.caucho.vfs.ReadStream;
import com.caucho.xml2.XmlParser;

import java.io.CharConversionException;
import java.io.EOFException;
import java.io.IOException;

/**
 * A fast reader to convert bytes to characters for parsing XML.
 */
public class Utf8Reader extends XmlReader {
  /**
   * Create a new reader.
   */
  public Utf8Reader()
  {
  }

  /**
   * Create a new reader with the given read stream.
   */
  public Utf8Reader(XmlParser parser, ReadStream is)
  {
    super(parser, is);
  }

  /**
   * Read the next character, returning -1 on end of file..
   */
  public int read()
    throws IOException
  {
    int ch1 = _is.read();

    if (ch1 == '\n') {
      _parser.setLine(++_line);
      return ch1;
    }
    else if (ch1 == '\r') {
      _parser.setLine(++_line);

      int ch2 = _is.read();
      if (ch2 == '\n')
        return '\n';

      if (ch2 < 0) {
      }
      else if (ch2 < 0x80)
        _parser.unread(ch2);
      else
        _parser.unread(readSecond(ch2));
      
      return '\n';
    }
    else if (ch1 < 0x80)
      return ch1;
    else
      return readSecond(ch1);
  }
    
  private int readSecond(int ch1)
    throws IOException
  {
    if ((ch1 & 0xe0) == 0xc0) {
      int ch2 = _is.read();
      if (ch2 < 0)
        throw new EOFException("unexpected end of file in utf8 character");
      else if ((ch2 & 0xc0) != 0x80)
        throw error(L.l("illegal utf8 encoding {0}", hex(ch1)));
      
      return ((ch1 & 0x1f) << 6) + (ch2 & 0x3f);
    }
    else if ((ch1 & 0xf0) == 0xe0) {
      int ch2 = _is.read();
      int ch3 = _is.read();
      
      if (ch2 < 0)
        throw new EOFException("unexpected end of file in utf8 character");
      else if ((ch2 & 0xc0) != 0x80)
        throw error(L.l("illegal utf8 encoding at {0} {1} {2}", hex(ch1), hex(ch2), hex(ch3)));
      
      if (ch3 < 0)
        throw new EOFException("unexpected end of file in utf8 character");
      else if ((ch3 & 0xc0) != 0x80)
        throw error(L.l("illegal utf8 encoding {0} {1} {2}",
                        hex(ch1), hex(ch2), hex(ch3)));

      int ch = ((ch1 & 0x1f) << 12) + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f);

      if (ch == 0xfeff) // handle some writers, e.g. microsoft
        return read();
      else
        return ch;
    }
    else
      throw error(L.l("illegal utf8 encoding at {0}", hex(ch1)));
  }

  private String hex(int n)
  {
    n = n & 0xff;
    
    CharBuffer cb = CharBuffer.allocate();

    cb.append("0x");

    int d = n / 16;
    if (d >= 0 && d <= 9)
      cb.append((char) ('0' + d));
    else
      cb.append((char) ('a' + d - 10));
    
    d = n % 16;
    if (d >= 0 && d <= 9)
      cb.append((char) ('0' + d));
    else
      cb.append((char) ('a' + d - 10));

    return cb.close();
  }

  private CharConversionException error(String msg)
  {
    String filename = _parser.getFilename();
    int line = _parser.getLine();

    if (filename != null)
      return new CharConversionException(filename + ":" + line + ": " + msg);
    else
      return new CharConversionException(msg);
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy