All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.netbeans.modules.diff.XMLEncodingHelper Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.netbeans.modules.diff;

import org.openide.ErrorManager;

import java.io.*;

/**
 * XML uses inband encoding detection - this class obtains it.
 *
 * 

Copy&pasted from taslist/api/.../XMLEncodingHelper * * @author Petr Kuzel * @version 1.0 */ final class XMLEncodingHelper extends Object { // // taken from XML module xml.core.lib.EncodingHelper // // heuristic constant guessing max prolog length private static final int EXPECTED_PROLOG_LENGTH = 1000; /** Detect input stream encoding. * The stream stays intact. * @return iana encoding names or Java hisrotical ("UTF8", "ASCII", etc.) or null * if the stream is not markable or enoding cannot be detected. */ public static String detectEncoding(InputStream in) throws IOException { if (! in.markSupported()) { ErrorManager.getDefault().log("XMLEncodingHelper got unmarkable stream: " + in.getClass()); // NOI18N return null; } try { in.mark(EXPECTED_PROLOG_LENGTH); byte[] bytes = new byte[EXPECTED_PROLOG_LENGTH]; for (int i = 0; inull for unrecognized */ static String autoDetectEncoding(byte[] buf) throws IOException { if (buf.length >= 4) { switch (buf[0]) { case 0: // byte order mark of (1234-big endian) or (2143) USC-4 // or '<' encoded as UCS-4 (1234, 2143, 3412) or UTF-16BE if (buf[1] == (byte)0x3c && buf[2] == (byte)0x00 && buf[3] == (byte)0x3f) { return "UnicodeBigUnmarked"; // NOI18N } // else it's probably UCS-4 break; case 0x3c: switch (buf[1]) { // First character is '<'; could be XML without // an XML directive such as "", "