All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jwat.tools.tasks.test.TestFile2 Maven / Gradle / Ivy

Go to download

JWAT-Tools uses the available JWAT libraries to make high level tasks available either from command-line or programmatically. Common tasks include: Test, Compress, Decompress, CDX, Arc2Warc. More specialised tasks include: Changed, ContainerMD, Delete, Extract, Interval, PathIndex, Unpack, Headers2CDX.

There is a newer version: 0.7.1
Show newest version
package org.jwat.tools.tasks.test;

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.jwat.arc.ArcRecordBase;
import org.jwat.archive.ArchiveParser;
import org.jwat.archive.ArchiveParserCallback;
import org.jwat.archive.Cloner;
import org.jwat.archive.FileIdent;
import org.jwat.archive.ManagedPayload;
import org.jwat.common.ContentType;
import org.jwat.common.UriProfile;
import org.jwat.gzip.GzipEntry;
import org.jwat.tools.core.ManagedPayloadContentTypeIdentifier;
import org.jwat.tools.core.ValidatorPlugin;
import org.jwat.warc.WarcRecord;

public class TestFile2 implements ArchiveParserCallback {

	public boolean bShowErrors;

	public boolean bValidateDigest;

	public UriProfile uriProfile;

    public int recordHeaderMaxSize = 8192;

    public int payloadHeaderMaxSize = 32768;

	protected ManagedPayload managedPayload;

	protected ManagedPayloadContentTypeIdentifier managedPayloadContentTypeIdentifier;

	protected Cloner cloner;

	public List validatorPlugins;

	public TestFileUpdateCallback callback;

	protected TestFileResult result;

	public TestFileResult processFile(File file, Cloner cloner) {
		result = new TestFileResult();
		result.file = file.getPath();

		managedPayloadContentTypeIdentifier = ManagedPayloadContentTypeIdentifier.getManagedPayloadContentTypeIdentifier();

		//this.cloner = cloner;

		ArchiveParser archiveParser = new ArchiveParser();
		archiveParser.uriProfile = uriProfile;
		archiveParser.bBlockDigestEnabled = bValidateDigest;
		archiveParser.bPayloadDigestEnabled = bValidateDigest;
	    archiveParser.recordHeaderMaxSize = recordHeaderMaxSize;
	    archiveParser.payloadHeaderMaxSize = payloadHeaderMaxSize;

		managedPayload = ManagedPayload.checkout();

		long consumed = archiveParser.parse(file, this);

		managedPayload.checkin();

		result.bGzipReader = archiveParser.gzipReader != null;
		result.bArcReader = archiveParser.arcReader != null;
		result.bWarcReader = archiveParser.warcReader != null;
		if (archiveParser.gzipReader != null) {
			result.bGzipIsComppliant = archiveParser.gzipReader.isCompliant();
		}
		if (archiveParser.arcReader != null) {
			result.bArcIsCompliant = archiveParser.arcReader.isCompliant();
		}
		if (archiveParser.warcReader != null) {
			result.bWarcIsCompliant = archiveParser.warcReader.isCompliant();
		}

		if (callback != null) {
			callback.finalUpdate(result, consumed);
		}
		return result;
	}

	@Override
	public void apcFileId(File file, int fileId) {
		switch (fileId) {
		case FileIdent.FILEID_GZIP:
			++result.gzFiles;
			break;
		case FileIdent.FILEID_ARC:
			++result.arcFiles;
			break;
		case FileIdent.FILEID_WARC:
			++result.warcFiles;
			break;
		case FileIdent.FILEID_ARC_GZ:
			++result.arcGzFiles;
			break;
		case FileIdent.FILEID_WARC_GZ:
			++result.warcGzFiles;
			break;
		case FileIdent.FILEID_UNKNOWN:
			++result.skipped;
			break;
		}
	}

	@Override
	public void apcGzipEntryStart(GzipEntry gzipEntry, long startOffset) {
		++result.gzipEntries;
		result.gzipErrors = gzipEntry.diagnostics.getErrors().size();
		result.gzipWarnings = gzipEntry.diagnostics.getWarnings().size();
		if ( bShowErrors ) {
			//TestResult.showGzipErrors(srcFile, gzipEntry, System.out);
			if (gzipEntry.diagnostics.hasErrors() || gzipEntry.diagnostics.hasWarnings()) {
				TestFileResultItemDiagnosis itemDiagnosis = new TestFileResultItemDiagnosis();
				itemDiagnosis.offset = startOffset;
				itemDiagnosis.errors = gzipEntry.diagnostics.getErrors();
				itemDiagnosis.warnings = gzipEntry.diagnostics.getWarnings();
				result.rdList.add(itemDiagnosis);
			}
		}
	}

	@Override
	public void apcArcRecordStart(ArcRecordBase arcRecord, long startOffset, boolean compressed) throws IOException {
		++result.arcRecords;
		//System.out.println(arcRecords + " - " + arcRecord.getStartOffset() + " (0x" + (Long.toHexString(arcRecord.getStartOffset())) + ")");
		TestFileResultItemDiagnosis itemDiagnosis = new TestFileResultItemDiagnosis();
		itemDiagnosis.offset = startOffset;
		// TODO arc type string in JWAT.
		switch (arcRecord.recordType) {
		case ArcRecordBase.RT_VERSION_BLOCK:
			managedPayload.manageVersionBlock(arcRecord, false);
			break;
		case ArcRecordBase.RT_ARC_RECORD:
			managedPayload.manageVersionBlock(arcRecord, false);
			break;
		default:
			throw new IllegalStateException();
		}
		arcRecord.close();
		if (arcRecord.diagnostics.hasErrors() || arcRecord.diagnostics.hasWarnings()) {
			itemDiagnosis.errors = arcRecord.diagnostics.getErrors();
			itemDiagnosis.warnings = arcRecord.diagnostics.getWarnings();
			if (cloner != null) {
				cloner.cloneArcRecord(arcRecord, managedPayload);
			}
		}
	    if (arcRecord.hasPayload() && !arcRecord.hasPseudoEmptyPayload()) {
	    	validate_payload(arcRecord, arcRecord.header.contentType, itemDiagnosis);
	    }
		if ( bShowErrors ) {
			//TestResult.showArcErrors( srcFile, arcRecord, System.out );
			if (itemDiagnosis.errors.size() > 0 || itemDiagnosis.warnings.size() > 0) {
				result.rdList.add(itemDiagnosis);
			}
		}
		result.arcErrors += itemDiagnosis.errors.size();
		result.arcWarnings += itemDiagnosis.warnings.size();
		for (int i=0; i 0 || itemDiagnosis.warnings.size() > 0) {
				result.rdList.add(itemDiagnosis);
			}
		}
		result.warcErrors += itemDiagnosis.errors.size();
		result.warcWarnings += itemDiagnosis.warnings.size();
		for (int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy