All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jwat.tools.tasks.containermd.ContainerMDWrapper Maven / Gradle / Ivy

Go to download

JWAT-Tools uses the available JWAT libraries to make high level tasks available either from command-line or programmatically. Common tasks include: Test, Compress, Decompress, CDX, Arc2Warc. More specialised tasks include: Changed, ContainerMD, Delete, Extract, Interval, PathIndex, Unpack, Headers2CDX.

There is a newer version: 0.7.1
Show newest version
package org.jwat.tools.tasks.containermd;

import java.text.ParseException;
import java.util.Collection;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;

/**
 * A wrapper for the data needed to build the containerMD section of the
 * manifest. Data are gathered during the parse of the archive file and
 * aggregated.
 */
public final class ContainerMDWrapper {
	public SortedMap encodings;
	public SortedMap blockFormats;
	public SortedMap payloadFormats;
	public SortedMap declaredMimeTypes;
	public SortedMap hosts;
	public SortedMap responses;

	private long number = 0;
	private long firstDateTime = -1L;
	private long lastDateTime = -1L;
	
	private long minimumSize = Long.MAX_VALUE;
	private long maximumSize = 0L;
	private long globalSize = 0L;

	/**
	 * Creates a new ContainerMDWrapper instance.
	 */
	public ContainerMDWrapper() {
		this.encodings = new TreeMap();

		this.blockFormats = new TreeMap();
		this.payloadFormats = new TreeMap();
		this.declaredMimeTypes = new TreeMap();
		this.hosts = new TreeMap();
		this.responses = new TreeMap();
	}

	/**
	 * Register a new entry of an ARC/WARC file
	 * 
	 * @param sourceName
	 * @param size
	 * @param dateTime
	 * @param format
	 * @param mimeType
	 * @param protocolVersion
	 * @param codeResponse
	 */
	public void addEntry(String sourceName, long size, String dateTime,
			String blockFormat, String declaredMimeType, String payloadFormat,
			String protocolVersion,
			String codeResponse) {
		number++;
		if (size >= 0L) {
			this.setMaximumSize(size);
			this.setMinimumSize(size);
			this.setGlobalSize(size);
		}

		long dateInSeconds = ContainerMDUtils.verifyDate(dateTime);
		if (dateInSeconds != -1L) {
			this.setFirstDateTime(dateInSeconds);
			this.setLastDateTime(dateInSeconds);
		}
		// Handle the sourceName
		if (ContainerMDUtils.isSet(sourceName)) {
			Matcher m = ContainerMDUtils.HOST_EXTRACTOR.matcher(sourceName);
			if (m.matches()) {
				String protocol = m.group(1);
				String hostName = m.group(2);
				int index = hostName.lastIndexOf('@');
				if (index != -1) {
					// hostname with @, keep only last part
					hostName = hostName.substring(index + 1);
				}
				if (hostName.contains("&")) {
					// hostname with &, replace with %26
					hostName = hostName.replaceAll("&", "%26");
				}

				this.handleHost(hostName, size);
				this.handleResponse(protocolVersion, protocol, codeResponse,
						size);
			}
		}

		// Handle the mimetype
		this.handleBlockFormat(ContainerMDUtils.verifyMimeType(blockFormat), size);
		this.handleDeclaredMimeType(ContainerMDUtils.verifyMimeType(declaredMimeType), size);
		this.handlePayloadFormat(payloadFormat, size);
	}

	/**
	 * Gets number of records
	 * 
	 * @return long
	 */
	public long getNumber() {
		return number;
	}

	/**
	 * Sets maximum size
	 * 
	 * @param size
	 */
	public void setMaximumSize(long size) {
		if (this.maximumSize < size) {
			this.maximumSize = size;
		}
	}

	/**
	 * Gets maximumSize
	 * 
	 * @return String
	 */
	public String getMaximumSize() {
		return Long.toString(this.maximumSize);
	}

	/**
	 * Sets minimum size
	 * 
	 * @param size
	 */
	public void setMinimumSize(long size) {
		if ((size >= 0L) && (this.minimumSize > size)) {
			this.minimumSize = size;
		}
	}

	/**
	 * Sets the global size
	 * 
	 * @param size
	 */
	public void setGlobalSize(long size) {
		if (size >= 0L) {
			this.globalSize += size;
		}
	}

	/**
	 * Gets the global size
	 * 
	 * @return long
	 */
	public long getGlobalSize() {
		return this.globalSize;
	}

	/**
	 * Gets minimumSize
	 * 
	 * @return String
	 */
	public String getMinimumSize() {
		return Long.toString((this.minimumSize == Long.MAX_VALUE) ? 0L
				: this.minimumSize);
	}

	/**
	 * Sets firstLastTime
	 * 
	 * @param dateTime
	 */
	public void setFirstDateTime(long dateTime) {
		if ((this.firstDateTime == -1L) || (this.firstDateTime > dateTime)) {
			this.firstDateTime = dateTime;
		}
	}

	/**
	 * Gets firstDateTime
	 * 
	 * @return String
	 */
	public String getFirstDateTime() {
		if (this.firstDateTime == -1L)
			return "";
		String dt = "";
		try {
	        dt = ContainerMDUtils.formatDateTime(ContainerMDUtils
	        		.longToDate(this.firstDateTime));
        } catch (ParseException e) {
        	dt = "";
        }
		return dt;
	}

	/**
	 * Sets lastDateTime
	 * 
	 * @param dateTime
	 */
	public void setLastDateTime(long dateTime) {
		if (this.lastDateTime < dateTime) {
			this.lastDateTime = dateTime;
		}
	}

	/**
	 * Gets lastDateTime
	 * 
	 * @return String
	 * @throws ParseException
	 */
	public String getLastDateTime() {
		if (this.lastDateTime == -1L)
			return "";
		String dt = "";
		try {
	        dt = ContainerMDUtils.formatDateTime(ContainerMDUtils
	        		.longToDate(this.lastDateTime));
        } catch (ParseException e) {
        	dt = "";
        }
		return dt;
	}

	/**
	 * Returns containerMD format elements formatted into XML.
	 * 
	 */
	public void getBlockFormats(StringBuilder sb, String name, boolean bAttrNameToValue) {
		if (this.blockFormats.isEmpty())
			return;
		if (name == null) name = "blockFormat";
		ContainerMDUtils.startElement(sb, name + "s");
		toXml(sb, this.blockFormats.values(), name, bAttrNameToValue);
		ContainerMDUtils.endElement(sb, name + "s");
	}

	public void getPayloadFormats(StringBuilder sb) {
		getPayloadFormats(sb, null, false);
	}

	/**
	 * Returns containerMD format elements formatted into XML.
	 * 
	 */
	public void getPayloadFormats(StringBuilder sb, String name, boolean bAttrNameToValue) {
		if (this.payloadFormats.isEmpty())
			return;
		if (name == null) name = "payloadFormat";
		ContainerMDUtils.startElement(sb, name + "s");
		toXml(sb, this.payloadFormats.values(), name, bAttrNameToValue);
		ContainerMDUtils.endElement(sb, name + "s");
	}

	/**
	 * Returns containerMD encoding elements formatted into XML.
	 * 
	 * @return String
	 */
	public void getEncodings(StringBuilder sb) {
		if (this.encodings.isEmpty())
			return;
		ContainerMDUtils.startElement(sb, "encodings");
		toXml(sb, this.encodings.values());
		ContainerMDUtils.endElement(sb, "encodings");
	}

	public void getDeclaredMimeTypes(StringBuilder sb) {
		getDeclaredMimeTypes(sb, null);
	}

	/**
	 * Returns containerMD declared mimeTypes elements formatted into XML.
	 * 
	 * @return String
	 */
	public void getDeclaredMimeTypes(StringBuilder sb, String name)  {
		if (this.declaredMimeTypes.isEmpty())
			return;
		if (name == null) name = "declaredMimeType";
		ContainerMDUtils.startElement(sb, name + "s");
		toXml(sb, this.declaredMimeTypes.values(), name);
		ContainerMDUtils.endElement(sb, name + "s");
	}

	/**
	 * Returns containerMD host elements formatted into XML.
	 * 
	 * @return String
	 */
	public void getHosts(StringBuilder sb) {
		if (this.hosts.isEmpty())
			return;
		ContainerMDUtils.startElement(sb, "hosts");
		toXml(sb, this.hosts.values());
		ContainerMDUtils.endElement(sb, "hosts");
	}

	/**
	 * Returns containerMD response elements formatted into XML.
	 * 
	 * @return String
	 */
	public void getResponses(StringBuilder sb) {
		if (this.responses.isEmpty())
			return;
		ContainerMDUtils.startElement(sb, "responses");
		toXml(sb, this.responses.values());
		ContainerMDUtils.endElement(sb, "responses");
	}

	/**
	 * Format in XML the content of a given aggregated element
	 * 
	 * @param sb
	 *            the StringBuilder where the XML is written
	 * @param elts
	 *            the elements to format
	 */
	public void toXml(StringBuilder sb, Collection elts) {
		toXml(sb, elts, null, false);
	}
	public void toXml(StringBuilder sb, Collection elts, String name) {
		toXml(sb, elts, name, false);
	}
	public void toXml(StringBuilder sb, Collection elts, String name, boolean bAttrNameToValue) {
		for (ContainerMDElement e : elts) {
			e.toXml(sb, name, bAttrNameToValue);
		}
	}

	/**
	 * Handles distinct encodings
	 * 
	 * @param encoding
	 */
	public void handleEncoding(String type, String method) {
		if (!encodings.containsKey(method)) {
			ContainerMDElement container = new ContainerMDElement("encoding");

			container.getAttributes().put(ContainerMDAttributeName.TYPE, type);
			container.getAttributes().put(ContainerMDAttributeName.METHOD,
					method);
			container.getAttributes().put(ContainerMDAttributeName.ORDER,
					Integer.valueOf(encodings.size() + 1));
			encodings.put(method, container);
		}
	}

	/**
	 * Handles declared block formats
	 * 
	 * @param format
	 * @param size
	 */
	public void handleBlockFormat(String format, long size) {
		if (!ContainerMDUtils.isSet(format))
			return;

		ContainerMDElement container = blockFormats.get(format);
		if (container != null) {
			Map attrs = container
					.getAttributes();
			((AtomicInteger) attrs.get(ContainerMDAttributeName.NUMBER))
					.incrementAndGet();
			((AtomicLong) attrs.get(ContainerMDAttributeName.GLOBALSIZE))
					.addAndGet(size);
		} else {
			container = new ContainerMDElement("blockFormat");

			container.getAttributes()
					.put(ContainerMDAttributeName.NAME, format);
			container.getAttributes().put(ContainerMDAttributeName.NUMBER,
					new AtomicInteger(1));
			container.getAttributes().put(ContainerMDAttributeName.GLOBALSIZE,
					new AtomicLong(size));

			blockFormats.put(format, container);
		}
	}

	/**
	 * Handles identified formats
	 * 
	 * @param format
	 * @param size
	 */
	public void handlePayloadFormat(String format, long size) {
		if (!ContainerMDUtils.isSet(format))
			return;

		ContainerMDElement container = payloadFormats.get(format);
		if (container != null) {
			Map attrs = container
					.getAttributes();
			((AtomicInteger) attrs.get(ContainerMDAttributeName.NUMBER))
					.incrementAndGet();
			((AtomicLong) attrs.get(ContainerMDAttributeName.GLOBALSIZE))
					.addAndGet(size);
		} else {
			container = new ContainerMDElement("payloadFormat");

			container.getAttributes()
					.put(ContainerMDAttributeName.NAME, format);
			container.getAttributes().put(ContainerMDAttributeName.NUMBER,
					new AtomicInteger(1));
			container.getAttributes().put(ContainerMDAttributeName.GLOBALSIZE,
					new AtomicLong(size));

			payloadFormats.put(format, container);
		}
	}

	/**
	 * Handles distinct declared mimeTypes
	 * 
	 * @param mimeType
	 */
	public void handleDeclaredMimeType(String mimeType, long size) {
		if (!ContainerMDUtils.isSet(mimeType))
			return;

		ContainerMDElement container = declaredMimeTypes.get(mimeType);
		if (container != null) {
			Map attrs = container
					.getAttributes();
			((AtomicInteger) attrs.get(ContainerMDAttributeName.NUMBER))
					.incrementAndGet();
			((AtomicLong) attrs.get(ContainerMDAttributeName.GLOBALSIZE))
					.addAndGet(size);
		} else {
			container = new ContainerMDElement("declaredMimeType", mimeType);

			container.getAttributes().put(ContainerMDAttributeName.NUMBER,
					new AtomicInteger(1));
			container.getAttributes().put(ContainerMDAttributeName.GLOBALSIZE,
					new AtomicLong(size));
			declaredMimeTypes.put(mimeType, container);
		}
	}

	/**
	 * Handles distinct hosts
	 * 
	 * @param host
	 * @param size
	 */
	public void handleHost(String host, long size) {
		ContainerMDElement container = hosts.get(host);
		if (container != null) {
			Map attrs = container
					.getAttributes();
			((AtomicInteger) attrs.get(ContainerMDAttributeName.NUMBER))
					.incrementAndGet();
			((AtomicLong) attrs.get(ContainerMDAttributeName.GLOBALSIZE))
					.addAndGet(size);
		} else {
			container = new ContainerMDElement("host", host);
			container.getAttributes().put(ContainerMDAttributeName.NUMBER,
					new AtomicInteger(1));
			container.getAttributes().put(ContainerMDAttributeName.GLOBALSIZE,
					new AtomicLong(size));
			hosts.put(host, container);
		}
	}

	/**
	 * Handles distinct response
	 * 
	 * @param protocolVersion
	 * @param protocolName
	 * @param codeResponse
	 */
	public void handleResponse(String protocolVersion, String protocolName,
			String codeResponse, long size) {
		String key = protocolName + '|' + protocolVersion + '|' + codeResponse;
		ContainerMDElement container = responses.get(key);
		if (container != null) {
			Map attrs = container
					.getAttributes();
			((AtomicInteger) attrs.get(ContainerMDAttributeName.NUMBER))
					.incrementAndGet();
			((AtomicLong) attrs.get(ContainerMDAttributeName.GLOBALSIZE))
					.addAndGet(size);
		} else {
			container = new ContainerMDElement("response", codeResponse);
			container.getAttributes().put(ContainerMDAttributeName.NUMBER,
					new AtomicInteger(1));
			container.getAttributes().put(
					ContainerMDAttributeName.PROTOCOL_NAME, protocolName);
			container.getAttributes().put(
					ContainerMDAttributeName.PROTOCOL_VERSION, protocolVersion);
			container.getAttributes().put(ContainerMDAttributeName.GLOBALSIZE,
					new AtomicLong(size));
			responses.put(key, container);
		}
	}

	@Override
	public String toString() {
		StringBuilder buf = new StringBuilder(512);
		buf.append("ContainerMDWrapper 0x")
				.append(Integer.toHexString(System.identityHashCode(this)))
				.append(" { ");
		buf.append("minimumSize=").append(this.minimumSize).append(", ");
		buf.append("maximumSize=").append(this.maximumSize).append(", ");
		buf.append("globalSize=").append(this.globalSize).append(", ");
		buf.append("firstDateTime=").append(this.firstDateTime).append(", ");
		buf.append("lastDateTime=").append(this.lastDateTime).append(", ");
		buf.append("encodings=").append(this.encodings).append(", ");
		buf.append("blockFormats=").append(this.blockFormats).append(", ");
		buf.append("declaredMimeTypes=").append(this.declaredMimeTypes).append(", ");
		buf.append("payloadFormats=").append(this.payloadFormats).append(", ");
		buf.append("hosts=").append(this.hosts).append(", ");
		buf.append("responses=").append(this.responses);
		return buf.append(" }").toString();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy