All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.extract.CDXExtractorOutput Maven / Gradle / Ivy

The newest version!
package org.archive.extract;

import java.io.IOException;
import java.io.PrintStream;
import java.util.List;

import org.archive.format.json.JSONView;
import org.archive.resource.Resource;
import org.archive.util.StreamCopy;

public class CDXExtractorOutput implements ExtractorOutput {
	// CANON DATE URL MIME HTTP-CODE SHA1 REDIR OFFSET FILE
	private static String URL_SPEC = "Envelope.ARC-Header-Metadata.Target-URI|Envelope.WARC-Header-Metadata.Target-URI";
	private static String DATE_SPEC = "Envelope.ARC-Header-Metadata.Date";
	private static String MIME_SPEC = "Envelope.ARC-Header-Metadata.Content-Type";
	private static String HTTP_CODE_SPEC = "Envelope.Payload-Metadata.HTTP-Response-Metadata.Response-Message.Status";
	private static String SHA1_SPEC = "Envelope.Payload-Metadata.HTTP-Response-Metadata.Entity-Digest";
	private static String REDIR_SPEC = "Envelope.Payload-Metadata.HTTP-Response-Metadata.Headers.Content-Location";
	private static String OFFSET_SPEC = "Container.Offset";
	private static String FILENAME_SPEC = "Container.Filename";
	private static String SPECS[] = {
		URL_SPEC, DATE_SPEC, URL_SPEC, MIME_SPEC, HTTP_CODE_SPEC, SHA1_SPEC, 
		REDIR_SPEC, OFFSET_SPEC, FILENAME_SPEC
	};
	private static char EMPTY = '-';
	private static char DELIM = ' ';
	JSONView view;
	private PrintStream out;
	public CDXExtractorOutput(PrintStream out) {
		view = new JSONView(SPECS);
		this.out = out;
	}
	public void output(Resource resource) throws IOException {
		StreamCopy.readToEOF(resource.getInputStream());
		List> res = view.apply(resource.getMetaData().getTopMetaData());
		StringBuilder sb = new StringBuilder();
		for(List actual : res) {
			sb.setLength(0);
//			boolean first = true;
			for(int i = 0; i < actual.size(); i++) {
//			actual.set(5, actual.get(5).substring(5));
//			for(String f : actual) {
				if(i > 0) {
					sb.append(DELIM);
				}
				String f = actual.get(i);
				if((f == null) || (f.length() == 0)) {
					sb.append(EMPTY);
				} else {
					if(i == 5) {
						sb.append(f.substring(5));
					} else {
						sb.append(f);						
					}
				}
			}
			out.println(sb.toString());
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy