info.monitorenter.cpdetector.io.CodepageDetectorProxy Maven / Gradle / Ivy
/*
* ${file_name} of project cpdetector,
* Copyright ${year} (C) Achim Westermann, created on 03.06.2004.
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this collection are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the cpDetector code in [sub] packages info.monitorenter and
* cpdetector.
*
* The Initial Developer of the Original Code is
* Achim Westermann .
*
* Portions created by the Initial Developer are Copyright (c) 2007
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** *
*/
package info.monitorenter.cpdetector.io;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
/**
*
* A proxy that delegate the codepage detection to all it's delegates. The first
* one (added in code-order) that does not return a null {@link java.nio.charset.Charset}from
* it's delegate method {@link #detectCodepage(java.net.URL)}wins the race and determines
* the codpage of the document specified by the given URL.
*
*
* If an underlying {@link info.monitorenter.cpdetector.io.ICodepageDetector}throws an
* {@link java.io.IOException}, the delegation search will be terminated by
* throwing this exception.
*
*
* @author Achim Westermann
*
*/
public final class CodepageDetectorProxy extends AbstractCodepageDetector {
/** serialVersionUID */
private static final long serialVersionUID = -7389424614984024701L;
/**
* Singleton instance.
*/
private static CodepageDetectorProxy instance = null;
/**
* The set of {@link info.monitorenter.cpdetector.io.ICodepageDetector}instances that this proxy will delegate
* to. These instances will be invoked in order to find the codepage until the
* first instance returns a valid codepage. If an {@link java.io.IOException}is thrown
* the search will terminate early (assuming that the execption is related to
* a general problem with the given URL.
*/
private Set detectors = new LinkedHashSet();
/**
* Singleton constructor. For internal use only.
*/
private CodepageDetectorProxy() {
super();
}
/**
* Singleton retrieval method.
*
* Be sure to configure the instance returned at a single location in your
* code to avoid unpredictable application - wide side effects.
*
*
* @return the sole instance in this VM.
*/
public static CodepageDetectorProxy getInstance() {
if (CodepageDetectorProxy.instance == null) {
CodepageDetectorProxy.instance = new CodepageDetectorProxy();
}
return CodepageDetectorProxy.instance;
}
/**
* Adds the given instance to this proxie's detection capability.
*
*
* Remember that the order of added ICodepageDetector instances is important
* for the internal delegation (see class description).
*
*
*/
public boolean add(ICodepageDetector detector) {
return this.detectors.add(detector);
}
/**
* @param url
* Should link to a file containing textual document. No check for
* images or other resources is made.
* @throws java.io.IOException
* If a problem with the url - handling occurs.
*/
public Charset detectCodepage(final URL url) throws IOException {
Charset ret = null;
Iterator detectorIt = this.detectors.iterator();
while (detectorIt.hasNext()) {
ret = detectorIt.next().detectCodepage(url);
if (ret != null) {
if (ret != UnknownCharset.getInstance()) {
if (ret instanceof UnsupportedCharset) {
// TODO: Debug logging: found illegal charset tag or encoding
// declaration.
} else {
break;
}
}
}
}
return ret;
}
/**
*
* Detects the codepage by iteratively delegating the call to all internal
* {@link info.monitorenter.cpdetector.io.ICodepageDetector} instances added by
* {@link #add(info.monitorenter.cpdetector.io.ICodepageDetector)}.
*
*
* The given InputStream has to support mark such that the call
* {@link java.io.InputStream#mark(int)} with argument length does not throw an
* exception. This is needed, as the stream has to be resetted to the
* beginning for each internal delegate that tries to detect.
*
*
* If this is impossible (large documents), prefer using
* {@link #detectCodepage(java.net.URL)}.
*
*
* @param in
* An InputStream for the document, that supports mark and a
* readlimit of argument length.
*
* @param length
* The amount of bytes to take into account. This number shouls not
* be longer than the amount of bytes retrievable from the
* InputStream but should be as long as possible to give the fallback
* detection (chardet) more hints to guess.
*
*
* @throws IllegalArgumentException
* if more bytes had to be read from the input stream than param
* length or the given input stream does not support marking.
*/
public Charset detectCodepage(final InputStream in, final int length) throws IOException, IllegalArgumentException {
if (!in.markSupported()) {
throw new IllegalArgumentException("The given input stream (" + in.getClass().getName() + ") has to support for marking.");
}
Charset ret = null;
int markLimit = length;
Iterator detectorIt = this.detectors.iterator();
while (detectorIt.hasNext()) {
in.mark(markLimit);
ret = detectorIt.next().detectCodepage(in, length);
// if more bytes have been read than marked (length) this will throw an
// exception:
try {
in.reset();
} catch (IOException ioex) {
IllegalStateException ise = new IllegalStateException(
"More than the given length had to be read and the given stream could not be reset. Undetermined state for this detection.");
ise.initCause(ioex);
throw ise;
}
if (ret != null) {
if (ret != UnknownCharset.getInstance()) {
if (ret instanceof UnsupportedCharset) {
// TODO: Debug logging: found illegal charset tag or encoding
// declaration.
} else {
break;
}
}
}
}
return ret;
}
/**
* @see Object#toString()
*/
public String toString() {
StringBuffer ret = new StringBuffer();
Iterator it = this.detectors.iterator();
int i = 1;
while (it.hasNext()) {
ret.append(i);
ret.append(") ");
ret.append(it.next().getClass().getName());
ret.append("\n");
i++;
}
return ret.toString();
}
}