All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikidata.query.rdf.tool.change.Change Maven / Gradle / Ivy

Go to download

Tools to sync Wikibase to RDF stores. Also contains overall integration tests that rely on everything else.

The newest version!
package org.wikidata.query.rdf.tool.change;

import static com.google.common.base.Preconditions.checkNotNull;
import static org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.OUTPUT_DATE_FORMATTER;

import java.io.Closeable;
import java.time.Duration;
import java.time.Instant;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.Delayed;
import java.util.concurrent.TimeUnit;

import org.openrdf.model.Statement;
import org.wikidata.query.rdf.tool.exception.RetryableException;

import com.google.common.collect.ImmutableList;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/**
 * A change in an entity in Wikibase.
 */
@SuppressFBWarnings("FCCD_FIND_CLASS_CIRCULAR_DEPENDENCY")
public class Change implements Comparable {
    /**
     * Change that is not associated with specific revision.
     */
    public static final long NO_REVISION = -1L;
    /**
     * Entity that changed.
     */
    private final String entityId;
    /**
     * Revision that the change changed to.
     */
    private final long revision;
    /**
     * Timestamp of the change.
     */
    private final Instant timestamp;
    /**
     * Chronology Id.
     */
    private final String chronologyId;

    /**
     * Set of processed statements for the change.
     */
    private Collection statements;

    /**
     * Cleanup list for the change.
     */
    private Collection refCleanupList = Collections.emptyList();

    /**
     * Cleanup list for the change.
     */
    private Collection valueCleanupList = Collections.emptyList();

    /**
     * Offset of the change in the external stream.
     */
    private final long offset;

    public Change(String entityId, long revision, Instant timestamp, long offset) {
        this(entityId, revision, timestamp, offset, null);
    }

    public Change(String entityId, long revision, Instant timestamp, long offset, String chronologyId) {
        this.entityId = cleanEntityId(entityId);
        this.revision = revision;
        this.timestamp = timestamp;
        this.offset = offset;
        this.chronologyId = chronologyId;
    }

    private String cleanEntityId(String entityIdWithPrefix) {
        // FIXME: this should not be hardcoded
        if (entityIdWithPrefix.startsWith("Property:")) {
           return entityIdWithPrefix.substring("Property:".length());
        } else if (entityIdWithPrefix.startsWith("Item:")) {
            return entityIdWithPrefix.substring("Item:".length());
        } else if (entityIdWithPrefix.startsWith("Lexeme:")) {
            return entityIdWithPrefix.substring("Lexeme:".length());
        }
        return entityIdWithPrefix;
    }

    /**
     * The entity that changed.
     */
    public String entityId() {
        return entityId;
    }

    /**
     * The revision of the change.
     *
     * @return the revision number of -1 if that information is not available
     */
    public long revision() {
        return revision;
    }

    /**
     * The offset of the change in the external stream.
     * Note that not all changes may come from an external stream,
     * and not all changes may come from the same stream.
     */
    public long offset() {
        return offset;
    }

    /**
     * The timestamp of the change.
     *
     * @return the timestamp or null if that information is not available
     */
    public Instant timestamp() {
        return timestamp;
    }

    /**
     * The entity that changed.
     */
    public String chronologyId() {
        return chronologyId;
    }

    @Override
    public String toString() {
        if (revision < -1 && timestamp == null) {
            return entityId;
        }
        StringBuilder b = new StringBuilder();
        b.append(entityId);
        if (revision >= 0) {
            b.append('@').append(revision);
        }
        if (timestamp != null) {
            b.append('@').append(OUTPUT_DATE_FORMATTER.format(timestamp));
            b.append('|').append(offset);
        }
        return b.toString();
    }

    /**
     * Detects changes. Implementations should store all state in subclasses of
     * Change.Batch.
     */
    public interface Source extends Closeable {
        /**
         * Fetch the first batch.
         *
         * @throws RetryableException is the fetch fails in a retryable way
         */
        B firstBatch() throws RetryableException;

        /**
         * Fetches the next batch after lastBatch.
         *
         * @throws RetryableException is the fetch fails in a retryable way
         */
        B nextBatch(B lastBatch) throws RetryableException;
    }

    /**
     * A batch of changes. Implementations should be immutable.
     */
    public interface Batch {
        /**
         * The changes in the batch.
         *
         * @return a list of changes. If the batch is empty then the list is
         *         empty. It is never null.
         */
        List changes();

        /**
         * Whether this batch had any changes, even invisible ones.
         * @return had changes?
         */
        boolean hasAnyChanges();

        /**
         * The unit of advanced() in English. Used for logging.
         */
        String advancedUnits();

        /**
         * How much this batch is "worth" in units of advancedUnits(). Used for
         * logging.
         */
        long advanced();

        /**
         * Human readable version of where this batch ends. Used for logging. It
         * should be obvious how to continue from here if possible.
         */
        String leftOffHuman();

        /**
         * Null or the latest date in the batch. If this is returned then the
         * updater process will attempt to mark the last update date in the rdf
         * store so it can pick up where it left off.
         */
        Instant leftOffDate();

        /**
         * Was this the last batch?
         */
        boolean last();

        /**
         * Simple default implementation of Batch.
         */
        abstract class AbstractDefaultImplementation implements Batch {
            /**
             * Changes in this batch.
             */
            private final ImmutableList changes;
            /**
             * How far did this batch advance?
             */
            private final long advanced;
            /**
             * Where did this batch leave off?
             */
            private final Object leftOff;

            public AbstractDefaultImplementation(ImmutableList changes, long advanced, Object leftOff) {
                this.changes = checkNotNull(changes);
                this.advanced = advanced;
                this.leftOff = leftOff;
            }

            @Override
            public List changes() {
                return changes;
            }

            @Override
            public boolean hasAnyChanges() {
                return !changes.isEmpty();
            }

            @Override
            public long advanced() {
                return advanced;
            }

            @Override
            public String leftOffHuman() {
                return leftOff.toString();
            }

            @Override
            public boolean last() {
                // By default we assume we're never done....
                return false;
            }
        }
    }

    @Override
    @SuppressFBWarnings(value = "EQ_COMPARETO_USE_OBJECT_EQUALS", justification = "This looks suspicious, but would need more investigation")
    // FIXME - since compareTo() is implemented, it would make sense to also implement equals() and hashCode(). But that
    // might lead to issues if the current code relies on Object.equals() in some places. It is probably simpler and
    // safer to move to an external comparator and might better represent the fact that comparing changes by offset is
    // only one of the way to compare them (e.g. natural ordering of Changes might also be by timestamp).
    public int compareTo(Change o) {
        return (int)(offset() - o.offset());
    }

    /**
     * Set statements collection.
     */
    public Collection getStatements() {
        return statements;
    }

    /**
     * Return statements collection.
     */
    public void setStatements(Collection statements) {
        this.statements = statements;
    }

    /**
     * Return value cleanup list.
     */
    public Collection getValueCleanupList() {
        return valueCleanupList;
    }

    /**
     * Return reference cleanup list.
     */
    public Collection getRefCleanupList() {
        return refCleanupList;
    }

    /**
     * Set reference cleanup list.
     */
    public void setRefCleanupList(Collection cleanupList) {
        this.refCleanupList = cleanupList;
    }

    /**
     * Set value cleanup list.
     */
    public void setValueCleanupList(Collection cleanupList) {
        this.valueCleanupList = cleanupList;
    }

    public class DelayedChange implements Delayed {
        /**
         * Delay for this change, if it needs to be retried.
         */
        private final Instant expires;

        DelayedChange(long delay) {
            this.expires = Instant.now().plusSeconds(delay);
        }

        @Override
        public long getDelay(TimeUnit unit) {
            return unit.convert(Duration.between(Instant.now(), expires).toNanos(), TimeUnit.NANOSECONDS);
        }

        @Override
        public int compareTo(Delayed o) {
            DelayedChange other = (DelayedChange)o;
            return expires.compareTo(other.expires);
        }

        @Override
        public boolean equals(Object o) {
            return (o instanceof DelayedChange) && getChange() == ((DelayedChange) o).getChange();
        }

        @Override
        public int hashCode() {
            return getChange().hashCode();
        }

        public Change getChange() {
            return Change.this;
        }
    }

    public void delay(Queue queue, long timeout) {
        queue.add(new DelayedChange(timeout));
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy