All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.jmatchparser.util.charset.AddBOMCharset Maven / Gradle / Ivy

Go to download

A java-based parser for parsing/grabbing web sites and other text or XML documents, based on a nondeterministic parser language, creating XML output. Also contains a few utility classes for HTML, CSV and text parsing, and additional character sets. The jMatchParser-charset module contains the character sets.

The newest version!
/*
 * Copyright (c) 2010 - 2011 Michael Schierl
 * 
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 
 * - Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 *   
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 *   
 * - Neither name of the copyright holders nor the names of its
 *   contributors may be used to endorse or promote products derived from
 *   this software without specific prior written permission.
 *   
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package net.sf.jmatchparser.util.charset;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Set;

class AddBOMCharset extends Charset {

	static final char BOM = '\uFEFF';
	public static final String SUFFIX = "-BOM";
	private final Charset base;

	protected AddBOMCharset(Charset base) {
		super(base.name() + SUFFIX, buildAliases(base.aliases()));
		this.base = base;
	}

	private static String[] buildAliases(Set aliases) {
		String[] result = new String[aliases.size()];
		int i = 0;
		for (String alias : aliases) {
			result[i] = alias + SUFFIX;
			i++;
		}
		return result;
	}

	@Override
	public boolean contains(Charset cs) {
		return cs instanceof AddBOMCharset || base.contains(cs);
	}

	@Override
	public CharsetDecoder newDecoder() {
		return new Decoder(base.newDecoder());
	}

	@Override
	public CharsetEncoder newEncoder() {
		return new Encoder(base.newEncoder());
	}

	private class Decoder extends CharsetDecoder {

		private final CharsetDecoder baseDecoder;
		private boolean bomRead = false, flushed = false;

		protected Decoder(CharsetDecoder baseDecoder) {
			super(AddBOMCharset.this, baseDecoder.averageCharsPerByte(), baseDecoder.maxCharsPerByte());
			this.baseDecoder = baseDecoder;
			baseDecoder.onMalformedInput(CodingErrorAction.REPORT);
			baseDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
		}

		@Override
		protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
			if (out.remaining() == 0)
				return CoderResult.OVERFLOW;
			if (!bomRead) {
				CharBuffer bom = CharBuffer.allocate(1);
				CoderResult bomResult = baseDecoder.decode(in, bom, false);
				bom.flip();
				if (bom.remaining() == 1) {
					char c = bom.get();
					if (c != BOM)
						out.put(c);
					bomRead = true;
				}
				if (!bomResult.isOverflow()) {
					return bomResult;
				}
				bomRead = true;
			}
			return baseDecoder.decode(in, out, false);
		}

		@Override
		protected CoderResult implFlush(CharBuffer out) {
			if (!flushed) {
				ByteBuffer empty = ByteBuffer.allocate(1);
				empty.flip();
				CoderResult result = baseDecoder.decode(empty, out, true);
				if (!result.isUnderflow())
					return result;
				result = baseDecoder.flush(out);
				if (!result.isUnderflow())
					return result;
			}
			return super.implFlush(out);
		}

		@Override
		protected void implReset() {
			baseDecoder.reset();
			bomRead = false;
			flushed = false;
		}
	}

	private class Encoder extends CharsetEncoder {

		private CharsetEncoder baseEncoder;
		boolean bomWritten = false, flushed = false;

		protected Encoder(CharsetEncoder baseEncoder) {
			super(AddBOMCharset.this, baseEncoder.averageBytesPerChar(), baseEncoder.maxBytesPerChar() * 2, baseEncoder.replacement());
			this.baseEncoder = baseEncoder;
			baseEncoder.onMalformedInput(CodingErrorAction.REPORT);
			baseEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
		}

		@Override
		protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
			if (!bomWritten) {
				CharBuffer bom = CharBuffer.allocate(1);
				bom.put(BOM);
				bom.flip();
				CoderResult result = baseEncoder.encode(bom, out, false);
				if (!result.isUnderflow())
					return result;
				bomWritten = true;
			}
			return baseEncoder.encode(in, out, false);
		}

		@Override
		protected void implReset() {
			baseEncoder.reset();
			bomWritten = false;
			flushed = false;
		}

		@Override
		protected CoderResult implFlush(ByteBuffer out) {
			if (!flushed) {
				CharBuffer maybeBom = CharBuffer.allocate(1);
				if (!bomWritten) {
					maybeBom.put(BOM);
				}
				maybeBom.flip();
				CoderResult result = baseEncoder.encode(maybeBom, out, true);
				if (!result.isUnderflow())
					return result;
				bomWritten = true;
				result = baseEncoder.flush(out);
				if (!result.isUnderflow())
					return result;
				flushed = true;
			}
			return super.implFlush(out);
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy