org.wikidata.wdtk.dumpfiles.DumpProcessingController Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of wdtk-dumpfiles Show documentation
WDTK support for processing Wikibase dump files
There is a newer version: 0.16.0
package org.wikidata.wdtk.dumpfiles;

/*
 * #%L
 * Wikidata Toolkit Dump File Handling
 * %%
 * Copyright (C) 2014 Wikidata Toolkit Developers
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileAlreadyExistsException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.DocumentDataFilter;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocumentProcessor;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocumentProcessorBroker;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocumentProcessorFilter;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Sites;
import org.wikidata.wdtk.dumpfiles.wmf.WmfDumpFileManager;
import org.wikidata.wdtk.util.DirectoryManager;
import org.wikidata.wdtk.util.DirectoryManagerFactory;
import org.wikidata.wdtk.util.WebResourceFetcher;
import org.wikidata.wdtk.util.WebResourceFetcherImpl;

/**
 * A class for controlling the processing of dump files through a unified
 * interface. The settings of the controller specify how dump files should be
 * fetched and processed.
 * 
 * The methods for registering listeners to process dump files that contain
 * revisions are
 * {@link #registerMwRevisionProcessor(MwRevisionProcessor, String, boolean)}
 * and
 * {@link #registerEntityDocumentProcessor(EntityDocumentProcessor, String, boolean)}.
 * 

 * For processing the content of wiki pages, there are two modes of operation:
 * revision-based and entity-document-based. The former is used when processing
 * dump files that contain revisions. These hold detailed information about each
 * revision (revision number, author, time, etc.) that could be used by revision
 * processors.
 * 

 * The entity-document-based operation is used when processing simplified dumps
 * that contain only the content of the current (entity) pages of a wiki. In
 * this case, no additional information is available and only the entity
 * document processors are called (since we have no revisions). Both modes use
 * the same entity document processors. In revision-based runs, it is possible
 * to restrict some entity document processors to certain content models only
 * (e.g., to process only properties). In entity-document-based runs, this is
 * ignored and all entity document processors get to see all the data.
 * 

 * The methods for revision-based processing of selected dump files (and
 * downloading them first, finding out which ones are relevant) are
 * {@link #processAllRecentRevisionDumps()},
 * {@link #processMostRecentMainDump()}, and
 * {@link #processMostRecentMainDump()}.
 * 

 * To extract the most recent sitelinks information, the method
 * {@link #getSitesInformation()} can be used. To get information about the
 * revision dump files that the main methods will process, one can use
 * {@link #getWmfDumpFileManager()} to get access to the underlying dump file
 * manager, which can be used to get access to dump file data.
 * 

 * The controller will also catch exceptions that may occur when trying to
 * download and read dump files. They will be turned into logged errors.
 *
 * @author Markus Kroetzsch
 *
 */
public class DumpProcessingController {

	static final Logger logger = LoggerFactory
			.getLogger(DumpProcessingController.class);

	/**
	 * Helper value class to store the registration settings of one listener.
	 *
	 * @author Markus Kroetzsch
	 *
	 */
	class ListenerRegistration {
		final String model;
		final boolean onlyCurrentRevisions;

		ListenerRegistration(String model, boolean onlyCurrentRevisions) {
			this.model = model;
			this.onlyCurrentRevisions = onlyCurrentRevisions;
		}

		@Override
		public int hashCode() {
			if (this.model == null) {
				return (this.onlyCurrentRevisions ? 1 : 0);
			} else {
				return 2 * this.model.hashCode()
						+ (this.onlyCurrentRevisions ? 1 : 0);
			}
		}

		@Override
		public boolean equals(Object obj) {
			if (this == obj) {
				return true;
			}
			if (obj == null) {
				return false;
			}
			if (!(obj instanceof ListenerRegistration)) {
				return false;
			}
			ListenerRegistration other = (ListenerRegistration) obj;
			if (this.model == null) {
				return other.model == null
						&& this.onlyCurrentRevisions == other.onlyCurrentRevisions;
			} else {
				return this.model.equals(other.model)
						&& this.onlyCurrentRevisions == other.onlyCurrentRevisions;
			}
		}
	}

	/**
	 * Map of all {@link EntityDocumentProcessor} object registered so far,
	 * based on the model and revision (current or not) they are registered for.
	 */
	final HashMap> entityDocumentProcessors;

	/**
	 * Map of all {@link MwRevisionProcessor} object registered so far, based on
	 * the model and revision (current or not) they are registered for.
	 */
	final HashMap> mwRevisionProcessors;

	/**
	 * The name of the project whose dumps are processed here.
	 */
	final String projectName;

	/**
	 * Should only current dumps be considered? This is changed automatically if
	 * some registered listener is interested in non-current dumps.
	 */
	boolean preferCurrent = true;

	/**
	 * The object used to access the Web or null if Web access is disabled. This
	 * is stored permanently here so that tests in this package can set the
	 * value to a mock object. This class should not need to be tested outside
	 * this package.
	 */
	WebResourceFetcher webResourceFetcher;

	/**
	 * The object used to access the download directory where dump files are
	 * stored. This is stored permanently here so that tests in this package can
	 * set the value to a mock object. This class should not need to be tested
	 * outside this package.
	 */
	DirectoryManager downloadDirectoryManager;

	final DocumentDataFilter filter = new DocumentDataFilter();

	/**
	 * Creates a new DumpFileProcessingController for the project of the given
	 * name. By default, the dump file directory will be assumed to be in the
	 * current directory and the object will access the Web to fetch the most
	 * recent files.
	 *
	 * @param projectName
	 *            Wikimedia projectname, e.g., "wikidatawiki" or "enwiki"
	 */
	public DumpProcessingController(String projectName) {
		this.projectName = projectName;
		this.entityDocumentProcessors = new HashMap>();
		this.mwRevisionProcessors = new HashMap>();

		try {
			setDownloadDirectory(System.getProperty("user.dir"));
		} catch (IOException e) {
			// The user.dir should always exist, so this is highly unusual.
			throw new RuntimeException(e.toString(), e);
		}

		setOfflineMode(false);
	}

	/**
	 * Sets the directory where dumpfiles are stored locally. If it does not
	 * exist yet, this directory will be created. Dumpfiles will later be stored
	 * in a subdirectory "dumpfiles", but this will only be created when needed.
	 *
	 * @param downloadDirectory
	 *            the download base directory
	 * @throws IOException
	 *             if the existence of the directory could not be checked or if
	 *             it did not exists and could not be created either
	 */
	public void setDownloadDirectory(String downloadDirectory)
			throws IOException {
		this.downloadDirectoryManager = DirectoryManagerFactory
				.createDirectoryManager(downloadDirectory, false);
	}

	/**
	 * Disables or enables Web access.
	 *
	 * @param offlineModeEnabled
	 *            if true, all Web access is disabled and only local files will
	 *            be processed
	 */
	public void setOfflineMode(boolean offlineModeEnabled) {
		if (offlineModeEnabled) {
			this.webResourceFetcher = null;
		} else {
			this.webResourceFetcher = new WebResourceFetcherImpl();
		}
	}

	/**
	 * Sets a property filter. If given, all data will be preprocessed to
	 * contain only statements for the given (main) properties.
	 *
	 * @see DocumentDataFilter#setPropertyFilter(Set)
	 * @param propertyFilter
	 *            set of properties that should be retained (can be empty)
	 */
	public void setPropertyFilter(Set propertyFilter) {
		this.filter.setPropertyFilter(propertyFilter);
	}

	/**
	 * Sets a site link filter. If given, all data will be preprocessed to
	 * contain only data for the given site keys.
	 *
	 * @see DocumentDataFilter#setSiteLinkFilter(Set)
	 * @param siteLinkFilter
	 *            set of siteLinks that should be retained (can be empty)
	 */
	public void setSiteLinkFilter(Set siteLinkFilter) {
		this.filter.setSiteLinkFilter(siteLinkFilter);
	}

	/**
	 * Sets a language filter. If given, all data will be preprocessed to
	 * contain only data for the given languages.
	 *
	 * @see DocumentDataFilter#setLanguageFilter(Set)
	 * @param languageFilter
	 *            set of language codes that should be retained (can be empty)
	 */
	public void setLanguageFilter(Set languageFilter) {
		this.filter.setLanguageFilter(languageFilter);
	}

	/**
	 * Registers an MwRevisionProcessor, which will henceforth be notified of
	 * all revisions that are encountered in the dump.
	 * 

	 * This only is used when processing dumps that contain revisions. In
	 * particular, plain JSON dumps contain no revision information.
	 * 

	 * Importantly, the {@link MwRevision} that the registered processors will
	 * receive is valid only during the execution of
	 * {@link MwRevisionProcessor#processRevision(MwRevision)}, but it will not
	 * be permanent. If the data is to be retained permanently, the revision
	 * processor needs to make its own copy.
	 *
	 * @param mwRevisionProcessor
	 *            the revision processor to register
	 * @param model
	 *            the content model that the processor is registered for; it
	 *            will only be notified of revisions in that model; if null is
	 *            given, all revisions will be processed whatever their model
	 * @param onlyCurrentRevisions
	 *            if true, then the subscriber is only notified of the most
	 *            current revisions; if false, then it will receive all
	 *            revisions, current or not
	 */
	public void registerMwRevisionProcessor(
			MwRevisionProcessor mwRevisionProcessor, String model,
			boolean onlyCurrentRevisions) {
		registerProcessor(mwRevisionProcessor, model, onlyCurrentRevisions,
				this.mwRevisionProcessors);
	}

	/**
	 * Registers an EntityDocumentProcessor, which will henceforth be notified
	 * of all entity documents that are encountered in the dump.
	 * 

	 * It is possible to register processors for specific content types and to
	 * use either all revisions or only the most current ones. This
	 * functionality is only available when processing dumps that contain this
	 * information. In particular, plain JSON dumps do not specify content
	 * models at all and have only one (current) revision of each entity.
	 *
	 * @param entityDocumentProcessor
	 *            the entity document processor to register
	 * @param model
	 *            the content model that the processor is registered for; it
	 *            will only be notified of revisions in that model; if null is
	 *            given, all revisions will be processed whatever their model
	 * @param onlyCurrentRevisions
	 *            if true, then the subscriber is only notified of the most
	 *            current revisions; if false, then it will receive all
	 *            revisions, current or not
	 */
	public void registerEntityDocumentProcessor(
			EntityDocumentProcessor entityDocumentProcessor, String model,
			boolean onlyCurrentRevisions) {
		registerProcessor(entityDocumentProcessor, model, onlyCurrentRevisions,
				this.entityDocumentProcessors);
	}

	/**
	 * Processes the most recent dump of the sites table to extract information
	 * about registered sites.
	 *
	 * @return a Sites objects that contains the extracted information, or null
	 *         if no sites dump was available (typically in offline mode without
	 *         having any previously downloaded sites dumps)
	 * @throws IOException
	 *             if there was a problem accessing the sites table dump or the
	 *             dump download directory
	 */
	public Sites getSitesInformation() throws IOException {
		MwDumpFile sitesTableDump = getMostRecentDump(DumpContentType.SITES);
		if (sitesTableDump == null) {
			return null;
		}

		// Create a suitable processor for such dumps and process the file:
		MwSitesDumpFileProcessor sitesDumpFileProcessor = new MwSitesDumpFileProcessor();
		sitesDumpFileProcessor.processDumpFileContents(
				sitesTableDump.getDumpFileStream(), sitesTableDump);

		return sitesDumpFileProcessor.getSites();
	}

	/**
	 * Processes all relevant page revision dumps in order. The registered
	 * listeners (MwRevisionProcessor or EntityDocumentProcessor objects) will
	 * be notified of all data they registered for.
	 * 

	 * Note that this method may not always provide reliable results since
	 * single incremental dump files are sometimes missing, even if earlier and
	 * later incremental dumps are available. In such a case, processing all
	 * recent dumps will miss some (random) revisions, thus reflecting a state
	 * that the wiki has never really been in. It might thus be preferable to
	 * process only a single (main) dump file without any incremental dumps.
	 *
	 * @see DumpProcessingController#processMostRecentMainDump()
	 * @see DumpProcessingController#processDump(MwDumpFile)
	 * @see DumpProcessingController#getMostRecentDump(DumpContentType)
	 */
	public void processAllRecentRevisionDumps() {
		WmfDumpFileManager wmfDumpFileManager = getWmfDumpFileManager();
		if (wmfDumpFileManager == null) {
			return;
		}

		MwDumpFileProcessor dumpFileProcessor = getRevisionDumpFileProcessor();

		for (MwDumpFile dumpFile : wmfDumpFileManager
				.findAllRelevantRevisionDumps(this.preferCurrent)) {
			processDumpFile(dumpFile, dumpFileProcessor);
		}
	}

	/**
	 * Processes the most recent incremental (daily) dump that is available.
	 * This is mainly useful for testing, since these dumps are much smaller
	 * than the main dumps. The registered listeners (MwRevisionProcessor or
	 * EntityDocumentProcessor objects) will be notified of all data they
	 * registered for.
	 *
	 * @see DumpProcessingController#processMostRecentMainDump()
	 * @see DumpProcessingController#processAllRecentRevisionDumps()
	 * @deprecated Use {@link #getMostRecentDump(DumpContentType)} with
	 *             {@link DumpContentType#DAILY} and
	 *             {@link #processDump(MwDumpFile)} instead; method will vanish
	 *             in WDTK 0.5
	 */
	@Deprecated
	public void processMostRecentDailyDump() {
		processDump(getMostRecentDump(DumpContentType.JSON));
	}

	/**
	 * Processes the most recent main (complete) dump that is available.
	 * Convenience method: same as retrieving a dump with
	 * {@link #getMostRecentDump(DumpContentType)} with
	 * {@link DumpContentType#CURRENT} or {@link DumpContentType#FULL}, and
	 * processing it with {@link #processDump(MwDumpFile)}. The individual
	 * methods should be used for better control and error handling.
	 *
	 * @see DumpProcessingController#processAllRecentRevisionDumps()
	 */
	public void processMostRecentMainDump() {
		DumpContentType dumpContentType;
		if (this.preferCurrent) {
			dumpContentType = DumpContentType.CURRENT;
		} else {
			dumpContentType = DumpContentType.FULL;
		}

		processDump(getMostRecentDump(dumpContentType));
	}

	/**
	 * Processes the most recent main (complete) dump in JSON form that is
	 * available. Convenience method: same as retrieving a dump with
	 * {@link #getMostRecentDump(DumpContentType)} with
	 * {@link DumpContentType#JSON}, and processing it with
	 * {@link #processDump(MwDumpFile)}. The individual methods should be used
	 * for better control and error handling.
	 *
	 * @see DumpProcessingController#processAllRecentRevisionDumps()
	 */
	public void processMostRecentJsonDump() {
		processDump(getMostRecentDump(DumpContentType.JSON));
	}

	/**
	 * Processes the contents of the given dump file. All registered processor
	 * objects will be notified of all data. Note that JSON dumps do not
	 * contains any revision information, so that registered
	 * {@link MwRevisionProcessor} objects will not be notified in this case.
	 * Dumps of type {@link DumpContentType#SITES} cannot be processed with this
	 * method; use {@link #getSitesInformation()} to process these dumps.
	 *
	 * @param dumpFile
	 *            the dump to process
	 */
	public void processDump(MwDumpFile dumpFile) {
		if (dumpFile == null) {
			return;
		}

		MwDumpFileProcessor dumpFileProcessor;
		switch (dumpFile.getDumpContentType()) {
		case CURRENT:
		case DAILY:
		case FULL:
			dumpFileProcessor = getRevisionDumpFileProcessor();
			break;
		case JSON:
			dumpFileProcessor = getJsonDumpFileProcessor();
			break;
		case SITES:
		default:
			logger.error("Dumps of type " + dumpFile.getDumpContentType()
					+ " cannot be processed as entity-document dumps.");
			return;
		}

		processDumpFile(dumpFile, dumpFileProcessor);
	}

	/**
	 * Processes the most recent dump of the given type using the given dump
	 * processor.
	 *
	 * @see DumpProcessingController#processMostRecentMainDump()
	 * @see DumpProcessingController#processAllRecentRevisionDumps()
	 *
	 * @param dumpContentType
	 *            the type of dump to process
	 * @param dumpFileProcessor
	 *            the processor to use
	 * @deprecated Use {@link #getMostRecentDump(DumpContentType)} and
	 *             {@link #processDump(MwDumpFile)} instead; method will vanish
	 *             in WDTK 0.5
	 */
	@Deprecated
	public void processMostRecentDump(DumpContentType dumpContentType,
			MwDumpFileProcessor dumpFileProcessor) {
		MwDumpFile dumpFile = getMostRecentDump(dumpContentType);
		if (dumpFile != null) {
			processDumpFile(dumpFile, dumpFileProcessor);
		}
	}

	/**
	 * Returns a handler for the most recent dump file of the given type that is
	 * available (under the current settings), or null if no dump file of this
	 * type could be retrieved.
	 *
	 * @param dumpContentType
	 *            the type of the dump, e.g., {@link DumpContentType#JSON}
	 * @return the most recent dump, or null if none was found
	 */
	public MwDumpFile getMostRecentDump(DumpContentType dumpContentType) {
		WmfDumpFileManager wmfDumpFileManager = getWmfDumpFileManager();
		if (wmfDumpFileManager == null) {
			return null;
		} else {
			MwDumpFile result = wmfDumpFileManager
					.findMostRecentDump(dumpContentType);
			if (result == null) {
				logger.warn("Could not find any dump of type "
						+ dumpContentType.toString() + ".");
			}
			return result;
		}
	}

	/**
	 * Processes one dump file with the given dump file processor, handling
	 * exceptions appropriately.
	 *
	 * @param dumpFile
	 *            the dump file to process
	 * @param dumpFileProcessor
	 *            the dump file processor to use
	 */
	void processDumpFile(MwDumpFile dumpFile,
			MwDumpFileProcessor dumpFileProcessor) {
		try (InputStream inputStream = dumpFile.getDumpFileStream()) {
			dumpFileProcessor.processDumpFileContents(inputStream, dumpFile);
		} catch (FileAlreadyExistsException e) {
			logger.error("Dump file "
					+ dumpFile.toString()
					+ " could not be processed since file "
					+ e.getFile()
					+ " already exists. Try deleting the file or dumpfile directory to attempt a new download.");
		} catch (IOException e) {
			logger.error("Dump file " + dumpFile.toString()
					+ " could not be processed: " + e.toString());
		}
	}

	/**
	 * Returns a WmfDumpFileManager based on the current settings. This object
	 * can be used to get direct access to dump files, e.g., to gather more
	 * information. Most basic operations can also be performed using the
	 * interface of the {@link DumpProcessingController} and this is often
	 * preferable.
	 * 
	 * This dump file manager will not be updated if the settings change later.
	 *
	 * @return a WmfDumpFileManager for the current settings or null if there
	 *         was a problem (e.g., since the current dump file directory could
	 *         not be accessed)
	 */
	public WmfDumpFileManager getWmfDumpFileManager() {
		try {
			return new WmfDumpFileManager(this.projectName,
					this.downloadDirectoryManager, this.webResourceFetcher);
		} catch (IOException e) {
			logger.error("Could not create dump file manager: " + e.toString());
			return null;
		}
	}

	/**
	 * Return the main dump file processor that should be used to process
	 * revisions.
	 *
	 * @return the main MwDumpFileProcessor for revisions
	 */
	MwDumpFileProcessor getRevisionDumpFileProcessor() {
		return new MwRevisionDumpFileProcessor(getMasterMwRevisionProcessor());
	}

	/**
	 * Return the main dump file processor that should be used to process the
	 * content of JSON dumps.
	 *
	 * @return the main MwDumpFileProcessor for JSON
	 */
	MwDumpFileProcessor getJsonDumpFileProcessor() {
		return new JsonDumpFileProcessor(getMasterEntityDocumentProcessor(),
				Datamodel.SITE_WIKIDATA);
	}

	/**
	 * Stores a registered processor object in a map of processors. Used
	 * internally to keep {@link EntityDocumentProcessor} and
	 * {@link MwRevisionProcessor} objects.
	 *
	 * @param processor
	 *            the processor object to register
	 * @param model
	 *            the content model that the processor is registered for; it
	 *            will only be notified of revisions in that model; if null is
	 *            given, all revisions will be processed whatever their model
	 * @param onlyCurrentRevisions
	 *            if true, then the subscriber is only notified of the most
	 *            current revisions; if false, then it will receive all
	 *            revisions, current or not
	 * @param processors
	 *            the map of lists of processors to store the processor in
	 */
	private  void registerProcessor(T processor, String model,
			boolean onlyCurrentRevisions,
			Map> processors) {
		this.preferCurrent = this.preferCurrent && onlyCurrentRevisions;

		ListenerRegistration listenerRegistration = new ListenerRegistration(
				model, onlyCurrentRevisions);
		if (!processors.containsKey(listenerRegistration)) {
			processors.put(listenerRegistration, new ArrayList<>());
		}

		processors.get(listenerRegistration).add(processor);
	}

	/**
	 * Returns an {@link EntityDocumentProcessor} object that calls all
	 * registered processors and that takes filters into account if needed.
	 *
	 * @return the master processor
	 */
	private EntityDocumentProcessor getMasterEntityDocumentProcessor() {
		EntityDocumentProcessor result = null;
		EntityDocumentProcessorBroker broker = null;

		for (Map.Entry> entry : this.entityDocumentProcessors
				.entrySet()) {
			for (EntityDocumentProcessor edp : entry.getValue()) {
				if (result == null) {
					result = edp;
				} else {
					if (broker == null) {
						broker = new EntityDocumentProcessorBroker();
						broker.registerEntityDocumentProcessor(result);
						result = broker;
					}
					broker.registerEntityDocumentProcessor(edp);
				}
			}
		}

		return filterEntityDocumentProcessor(result);
	}

	/**
	 * Wraps the given processor into a {@link EntityDocumentProcessorFilter} if
	 * global filters are configured; otherwise just returns the processor
	 * unchanged.
	 *
	 * @param processor
	 *            the processor to wrap
	 */
	private EntityDocumentProcessor filterEntityDocumentProcessor(
			EntityDocumentProcessor processor) {
		if (this.filter.getPropertyFilter() == null
				&& this.filter.getSiteLinkFilter() == null
				&& this.filter.getLanguageFilter() == null) {
			return processor;
		} else {
			return new EntityDocumentProcessorFilter(
					processor, this.filter);
		}
	}

	/**
	 * Returns an {@link MwRevisionProcessor} object that calls all registered
	 * processors and that takes filters into account if needed.
	 *
	 * @return the master processor
	 */
	private MwRevisionProcessor getMasterMwRevisionProcessor() {
		MwRevisionProcessorBroker result = new MwRevisionProcessorBroker();

		for (Entry> entry : this.mwRevisionProcessors
				.entrySet()) {
			for (MwRevisionProcessor mrp : entry.getValue()) {
				result.registerMwRevisionProcessor(mrp, entry.getKey().model,
						entry.getKey().onlyCurrentRevisions);
			}
		}

		for (Map.Entry> edpEntry : this.entityDocumentProcessors
				.entrySet()) {
			EntityDocumentProcessor resultEdp;
			if (edpEntry.getValue().size() == 1) {
				resultEdp = edpEntry.getValue().get(0);
			} else {
				EntityDocumentProcessorBroker edpb = new EntityDocumentProcessorBroker();
				for (EntityDocumentProcessor edp : edpEntry.getValue()) {
					edpb.registerEntityDocumentProcessor(edp);
				}
				resultEdp = edpb;
			}

			result.registerMwRevisionProcessor(new WikibaseRevisionProcessor(
					filterEntityDocumentProcessor(resultEdp),
					Datamodel.SITE_WIKIDATA), edpEntry.getKey().model, edpEntry
					.getKey().onlyCurrentRevisions);
		}

		return result;
	}

}