All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.tudarmstadt.ukp.wikipedia.revisionmachine.difftool.data.codec.RevisionEncoder Maven / Gradle / Ivy

There is a newer version: 1.1.0
Show newest version
/*******************************************************************************
 * Copyright (c) 2011 Ubiquitous Knowledge Processing Lab
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Lesser Public License v3
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/lgpl.html
 *
 * Project Website:
 * 	http://jwpl.googlecode.com
 *
 * Contributors:
 * 	Torsten Zesch
 * 	Simon Kulessa
 * 	Oliver Ferschke
 ******************************************************************************/
package de.tudarmstadt.ukp.wikipedia.revisionmachine.difftool.data.codec;

import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.zip.Deflater;

import org.apache.commons.codec.binary.Base64;

import de.tudarmstadt.ukp.wikipedia.revisionmachine.common.exceptions.ConfigurationException;
import de.tudarmstadt.ukp.wikipedia.revisionmachine.common.exceptions.EncodingException;
import de.tudarmstadt.ukp.wikipedia.revisionmachine.difftool.config.ConfigurationKeys;
import de.tudarmstadt.ukp.wikipedia.revisionmachine.difftool.config.ConfigurationManager;
import de.tudarmstadt.ukp.wikipedia.revisionmachine.difftool.data.tasks.content.Diff;
import de.tudarmstadt.ukp.wikipedia.revisionmachine.difftool.data.tasks.content.DiffPart;

/**
 * The RevisionApi class contains methods to encode the diff information.
 *
 *
 *
 */
public class RevisionEncoder
	implements RevisionEncoderInterface
{

	/** Reference to the codec */
	private RevisionCodecData codecData;

	/** Reference to the BitWriter */
	private BitWriter data;

	/** Configuration Parameter - Zip Compression */
	private final boolean MODE_ZIP_COMPRESSION;

	/** Configuration Parameter - Wikipedia Encoding */
	private final String WIKIPEDIA_ENCODING;

	/**
	 * (Constructor) Creates a new RevisionEnocder object.
	 *
	 * @throws ConfigurationException
	 *             if an error occurs while accessing the configuration
	 *             parameters
	 */
	public RevisionEncoder()
		throws ConfigurationException
	{

		ConfigurationManager config = ConfigurationManager.getInstance();

		WIKIPEDIA_ENCODING = (String) config
				.getConfigParameter(ConfigurationKeys.WIKIPEDIA_ENCODING);

		MODE_ZIP_COMPRESSION = (Boolean) config
				.getConfigParameter(ConfigurationKeys.MODE_ZIP_COMPRESSION_ENABLED);
	}

	/*
	 * (non-Javadoc)
	 *
	 * @see
	 * de.tud.ukp.kulessa.delta.data.codec.RevisionEncoderInterface#binaryDiff
	 * (de.tud.ukp.kulessa.delta.data.codec.RevisionCodecData,
	 * de.tud.ukp.kulessa.delta.data.tasks.content.Diff)
	 */
	@Override
	public byte[] binaryDiff(final RevisionCodecData codecData, final Diff diff)
		throws UnsupportedEncodingException, EncodingException
	{

		byte[] bData = encode(codecData, diff);
		if (MODE_ZIP_COMPRESSION) {

			Deflater compresser = new Deflater();
			compresser.setInput(bData);
			compresser.finish();

			byte[] output = new byte[1000];
			ByteArrayOutputStream stream = new ByteArrayOutputStream();

			int cLength;
			do {
				cLength = compresser.deflate(output);
				stream.write(output, 0, cLength);
			}
			while (cLength == 1000);

			output = stream.toByteArray();
			if (bData.length + 1 < output.length) {
				return bData;
			}
			else {

				stream = new ByteArrayOutputStream();
				stream.write(new byte[] { -128 }, 0, 1);
				stream.write(output, 0, output.length);

				return stream.toByteArray();
			}
		}

		return bData;
	}

	/**
	 * Creates the binary encoding of the diff while using the codec
	 * information.
	 *
	 * @param codecData
	 *            codec
	 * @param diff
	 *            diff
	 * @return binary data
	 *
	 * @throws UnsupportedEncodingException
	 *             if the character encoding is unsupported
	 * @throws EncodingException
	 *             if the encoding failed
	 */
	private byte[] encode(final RevisionCodecData codecData, final Diff diff)
		throws UnsupportedEncodingException, EncodingException
	{

		this.data = new BitWriter(codecData.totalSizeInBits());
		encodeCodecData(codecData);

		DiffPart part;

		Iterator partIt = diff.iterator();
		while (partIt.hasNext()) {
			part = partIt.next();

			switch (part.getAction()) {
			case FULL_REVISION_UNCOMPRESSED:
				encodeFullRevisionUncompressed(part);
				break;
			case INSERT:
				encodeInsert(part);
				break;
			case DELETE:
				encodeDelete(part);
				break;
			case REPLACE:
				encodeReplace(part);
				break;
			case CUT:
				encodeCut(part);
				break;
			case PASTE:
				encodePaste(part);
				break;
			/*
			 * case FULL_REVISION_COMPRESSED:
			 * encodeFullRevisionCompressed(part); break;
			 */
			default:
				throw new RuntimeException();
			}
		}

		return data.toByteArray();
	}

	/**
	 * Encodes the codecData.
	 *
	 * @param codecData
	 *            Reference to the codec
	 *
	 * @throws EncodingException
	 *             if the encoding failed
	 */
	private void encodeCodecData(final RevisionCodecData codecData)
		throws EncodingException
	{

		this.codecData = codecData;

		// C
		data.writeBit(0);
		data.writeBit(0);
		data.writeBit(0);

		// BLOCK SIZES - S E B L
		this.data.writeValue(5, codecData.getBlocksizeS());
		this.data.writeValue(5, codecData.getBlocksizeE());
		this.data.writeValue(5, codecData.getBlocksizeB());
		this.data.writeValue(5, codecData.getBlocksizeL());

		// 1 Bit
		data.writeFillBits();
	}

	/**
	 * Encodes a Cut operation.
	 *
	 * @param part
	 *            Reference to the Cut operation
	 *
	 * @throws EncodingException
	 *             if the encoding failed
	 */
	private void encodeCut(final DiffPart part)
		throws EncodingException
	{

		// C
		data.writeBit(1);
		data.writeBit(0);
		data.writeBit(1);

		// S
		data.writeValue(codecData.getBlocksizeS(), part.getStart());

		// E
		data.writeValue(codecData.getBlocksizeE(), part.getLength());

		// B
		data.writeValue(codecData.getBlocksizeB(),
				Integer.parseInt(part.getText()));

		data.writeFillBits();

	}

	/**
	 * Encodes a Delete operation.
	 *
	 * @param part
	 *            Reference to the Delete operation
	 *
	 * @throws EncodingException
	 *             if the encoding failed
	 */
	private void encodeDelete(final DiffPart part)
		throws EncodingException
	{

		// C
		data.writeBit(0);
		data.writeBit(1);
		data.writeBit(1);

		// S
		data.writeValue(codecData.getBlocksizeS(), part.getStart());

		// E
		data.writeValue(codecData.getBlocksizeE(), part.getLength());

		data.writeFillBits();
	}

	/*
	 * (non-Javadoc)
	 *
	 * @see
	 * de.tud.ukp.kulessa.delta.data.codec.RevisionEncoderInterface#encodeDiff
	 * (de.tud.ukp.kulessa.delta.data.codec.RevisionCodecData,
	 * de.tud.ukp.kulessa.delta.data.tasks.content.Diff)
	 */
	@Override
	public String encodeDiff(final RevisionCodecData codecData, final Diff diff)
		throws UnsupportedEncodingException, EncodingException
	{

		String sEncoding;
		byte[] bData = encode(codecData, diff);
		if (MODE_ZIP_COMPRESSION) {

			Deflater compresser = new Deflater();
			compresser.setInput(bData);
			compresser.finish();

			byte[] output = new byte[1000];
			ByteArrayOutputStream stream = new ByteArrayOutputStream();

			int cLength;
			do {
				cLength = compresser.deflate(output);
				stream.write(output, 0, cLength);
			}
			while (cLength == 1000);

			output = stream.toByteArray();

			if (bData.length + 1 < output.length) {
				sEncoding = Base64.encodeBase64String(bData);
			}
			else {
				sEncoding = "_" + Base64.encodeBase64String(output);
			}
		}
		else {
			sEncoding = Base64.encodeBase64String(bData);
		}

		return sEncoding;
	}

	/**
	 * Encodes a FullRevision operation.
	 *
	 * @param part
	 *            Reference to the FullRevision operation
	 *
	 * @throws UnsupportedEncodingException
	 *             if the character encoding is unsupported
	 * @throws EncodingException
	 *             if the encoding failed
	 */
	private void encodeFullRevisionUncompressed(final DiffPart part)
		throws UnsupportedEncodingException, EncodingException
	{

		// C
		data.writeBit(0);
		data.writeBit(0);
		data.writeBit(1);

		// L T
		String text = part.getText();
		byte[] bText = text.getBytes(WIKIPEDIA_ENCODING);

		data.writeValue(codecData.getBlocksizeL(), bText.length);
		data.write(bText);

	}

	/**
	 * Encodes an Insert operation.
	 *
	 * @param part
	 *            Reference to the Insert operation
	 *
	 * @throws UnsupportedEncodingException
	 *             if the character encoding is unsupported
	 * @throws EncodingException
	 *             if the encoding failed
	 */
	private void encodeInsert(final DiffPart part)
		throws UnsupportedEncodingException, EncodingException
	{

		// C
		data.writeBit(0);
		data.writeBit(1);
		data.writeBit(0);

		// S
		data.writeValue(codecData.getBlocksizeS(), part.getStart());

		// L T
		String text = part.getText();
		byte[] bText = text.getBytes(WIKIPEDIA_ENCODING);

		data.writeValue(codecData.getBlocksizeL(), bText.length);
		data.write(bText);
	}

	/**
	 * Encodes a Paste operation.
	 *
	 * @param part
	 *            Reference to the Paste operation
	 *
	 * @throws EncodingException
	 *             if the encoding failed
	 */
	private void encodePaste(final DiffPart part)
		throws EncodingException
	{

		// C
		data.writeBit(1);
		data.writeBit(1);
		data.writeBit(0);

		// S
		data.writeValue(codecData.getBlocksizeS(), part.getStart());

		// B
		data.writeValue(codecData.getBlocksizeB(),
				Integer.parseInt(part.getText()));

		data.writeFillBits();
	}

	/**
	 * Encodes a Replace operation.
	 *
	 * @param part
	 *            Reference to the replace operation
	 *
	 * @throws UnsupportedEncodingException
	 *             if the character encoding is unsupported
	 * @throws EncodingException
	 *             if the encoding failed
	 */
	private void encodeReplace(final DiffPart part)
		throws UnsupportedEncodingException, EncodingException
	{

		// C
		data.writeBit(1);
		data.writeBit(0);
		data.writeBit(0);

		// S
		data.writeValue(codecData.getBlocksizeS(), part.getStart());

		// E
		data.writeValue(codecData.getBlocksizeE(), part.getLength());

		// L T
		String text = part.getText();
		byte[] bText = text.getBytes(WIKIPEDIA_ENCODING);

		data.writeValue(codecData.getBlocksizeL(), bText.length);
		data.write(bText);
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy