
com.gs.obevo.util.vfs.DetectCharsetStrategy Maven / Gradle / Ivy
/**
* Copyright 2017 Goldman Sachs.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.gs.obevo.util.vfs;
import java.nio.charset.Charset;
import org.mozilla.universalchardet.UniversalDetector;
/**
* Detect the charset of some input bytes, or returns null if unable to detect.
* Leverages the Mozilla universalchardet library: https://code.google.com/archive/p/juniversalchardet/
*/
class DetectCharsetStrategy implements CharsetStrategy {
private static final ThreadLocal charsetDetector = new ThreadLocal() {
@Override
protected UniversalDetector initialValue() {
return new UniversalDetector(null);
}
};
@Override
public Charset determineCharset(byte[] bytes) {
UniversalDetector detector = charsetDetector.get();
try {
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
return Charset.forName(encoding);
}
return null;
} finally {
detector.reset();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy