All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ar.com.zauber.leviathan.impl.httpclient.charset.DetectorCharsetStrategy Maven / Gradle / Ivy

There is a newer version: 1.0.2
Show newest version
/**
 * Copyright (c) 2009-2014 Zauber S.A. 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ar.com.zauber.leviathan.impl.httpclient.charset;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;

import org.mozilla.universalchardet.UniversalDetector;

import ar.com.zauber.leviathan.common.CharsetStrategy;
import ar.com.zauber.leviathan.common.ResponseMetadata;


/**
 * Implementaci?n de {@link CharsetStrategy} que analiza el contenido del 
 * documento para intentar determinar el encoding del mismo.
 * En caso de no poder lograrlo retorna null
 * 
 * 
 * @author Mariano Focaraccio
 * @since Sep 16, 2010
 */
public class DetectorCharsetStrategy implements CharsetStrategy {

    /** @see CharsetStrategy#getCharset(ResponseMetadata, InputStream) */
    public final Charset getCharset(final ResponseMetadata meta, 
                                    final InputStream content) {
        
        final UniversalDetector detector = new UniversalDetector(null);
        final byte[] buf = new byte[4096];
        int nread;
        try {
            while ((nread = content.read(buf)) > 0 && !detector.isDone()) {
              detector.handleData(buf, 0, nread);
            }
            detector.dataEnd();
            final String charset = detector.getDetectedCharset();
            final Charset result = (charset == null) ? null 
                                     : Charset.forName(charset);
            return result;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy