org.netbeans.modules.diff.XMLEncodingHelper Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.netbeans.modules.diff;
import org.openide.ErrorManager;
import java.io.*;
/**
* XML uses inband encoding detection - this class obtains it.
*
* Copy&pasted from taslist/api/.../XMLEncodingHelper
*
* @author Petr Kuzel
* @version 1.0
*/
final class XMLEncodingHelper extends Object {
//
// taken from XML module xml.core.lib.EncodingHelper
//
// heuristic constant guessing max prolog length
private static final int EXPECTED_PROLOG_LENGTH = 1000;
/** Detect input stream encoding.
* The stream stays intact.
* @return iana encoding names or Java hisrotical ("UTF8", "ASCII", etc.) or null
* if the stream is not markable or enoding cannot be detected.
*/
public static String detectEncoding(InputStream in) throws IOException {
if (! in.markSupported()) {
ErrorManager.getDefault().log("XMLEncodingHelper got unmarkable stream: " + in.getClass()); // NOI18N
return null;
}
try {
in.mark(EXPECTED_PROLOG_LENGTH);
byte[] bytes = new byte[EXPECTED_PROLOG_LENGTH];
for (int i = 0; inull for unrecognized
*/
static String autoDetectEncoding(byte[] buf) throws IOException {
if (buf.length >= 4) {
switch (buf[0]) {
case 0:
// byte order mark of (1234-big endian) or (2143) USC-4
// or '<' encoded as UCS-4 (1234, 2143, 3412) or UTF-16BE
if (buf[1] == (byte)0x3c && buf[2] == (byte)0x00 && buf[3] == (byte)0x3f) {
return "UnicodeBigUnmarked"; // NOI18N
}
// else it's probably UCS-4
break;
case 0x3c:
switch (buf[1]) {
// First character is '<'; could be XML without
// an XML directive such as "", "