All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openstreetmap.atlas.checks.event.CheckFlagGeoJsonProcessor Maven / Gradle / Ivy

package org.openstreetmap.atlas.checks.event;

import java.util.Date;
import java.util.Vector;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.openstreetmap.atlas.checks.constants.CommonConstants;
import org.openstreetmap.atlas.checks.distributed.GeoJsonPathFilter;
import org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFileHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.eventbus.AllowConcurrentEvents;
import com.google.common.eventbus.Subscribe;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;

/**
 * A {@link Processor} for {@link CheckFlagEvent}s to write them into GeoJson files
 *
 * @author brian_l_davis
 */
public final class CheckFlagGeoJsonProcessor implements Processor
{

    private static final int MAX_BATCH_SUM = 25000;
    private static final int MIN_BATCH_SIZE = 100;
    private static final int BUCKET_CAPACITY = 100;
    private static final int BUCKET_INCREMENT = 25;

    private static final Logger logger = LoggerFactory.getLogger(CheckFlagGeoJsonProcessor.class);

    // File helper to write files
    private final SparkFileHelper fileHelper;

    // Directory to write files in
    private final String directory;

    // Event Features bucketed by Challenge
    private final ConcurrentHashMap> featureBuckets = new ConcurrentHashMap<>();

    // Bucket locks used to synchronize filling and emptying buckets
    private final ConcurrentHashMap bucketLocks = new ConcurrentHashMap<>();

    // Whether or not to compress output file
    private boolean compressOutput = true;

    // Batch size override
    private int batchSizeOverride;

    // Detect has written
    private boolean hasWritten;

    /**
     * Default constructor
     *
     * @param fileHelper
     *            {@link SparkFileHelper} for I/O operations
     * @param outputFolder
     *            output folder path to write files to
     */
    public CheckFlagGeoJsonProcessor(final SparkFileHelper fileHelper, final String outputFolder)
    {
        this.fileHelper = fileHelper;
        this.directory = outputFolder;
        this.hasWritten = false;
    }

    @Override
    @Subscribe
    @AllowConcurrentEvents
    public void process(final CheckFlagEvent event)
    {
        final String challenge = event.getCheckFlag().getChallengeName()
                .orElse(event.getCheckName());

        final Vector featureBucket = this.featureBuckets.computeIfAbsent(challenge,
                key -> new Vector<>(BUCKET_CAPACITY, BUCKET_INCREMENT));
        final ReadWriteLock bucketLock = this.bucketLocks.computeIfAbsent(challenge,
                key -> new ReentrantReadWriteLock());

        bucketLock.readLock().lock();
        try
        {
            featureBucket.add(event.toGeoJsonFeature());
        }
        finally
        {
            bucketLock.readLock().unlock();
        }

        final int batchSize = computeBatchSize();
        if (featureBucket.size() >= batchSize)
        {
            bucketLock.writeLock().lock();
            try
            {
                if (featureBucket.size() >= batchSize)
                {
                    this.write(challenge, featureBucket);
                }
            }
            finally
            {
                bucketLock.writeLock().unlock();
            }
        }
    }

    @Override
    @Subscribe
    public void process(final ShutdownEvent event)
    {
        try
        {
            if (!this.featureBuckets.isEmpty())
            {
                this.featureBuckets.forEach(this::write);
            }
            else
            {
                this.write(CommonConstants.EMPTY_STRING, new Vector<>());
            }
        }
        catch (final Exception e)
        {
            logger.warn("CheckFlag geojson file write is failed.", e);
        }
    }

    /**
     * Overrides the computed batch size
     *
     * @param batchSizeOverride
     *            override batch size value
     * @return the {@link CheckFlagFileProcessor}
     */
    public CheckFlagGeoJsonProcessor withBatchSizeOverride(final int batchSizeOverride)
    {
        this.batchSizeOverride = batchSizeOverride;
        return this;
    }

    /**
     * Sets whether or not output files are compressed
     *
     * @param compress
     *            value to set
     * @return the {@link CheckFlagFileProcessor}
     */
    public CheckFlagGeoJsonProcessor withCompression(final boolean compress)
    {
        this.compressOutput = compress;
        return this;
    }

    /**
     * Returns bucket size based on the number of Checks we have bucketed so far
     *
     * @return batch size
     */
    protected int computeBatchSize()
    {
        if (this.batchSizeOverride > 0)
        {
            return this.batchSizeOverride;
        }
        if (this.featureBuckets.isEmpty())
        {
            return MAX_BATCH_SUM;
        }
        return Math.max(MAX_BATCH_SUM / this.featureBuckets.size(), MIN_BATCH_SIZE);
    }

    protected String getFilename(final String challenge, final int size)
    {
        return String.format("%s-%s-%s%s", challenge, new Date().getTime(), size,
                new GeoJsonPathFilter(this.compressOutput).getExtension());
    }

    /**
     * Writes a new file with the cached in String buffer
     */
    private void write(final String challenge, final Vector featureBucket)
    {
        if (featureBucket.size() > 0)
        {
            final JsonObject featureCollection = new JsonObject();
            featureCollection.addProperty("type", "FeatureCollection");
            final JsonArray featureJsonArray = new JsonArray();
            featureBucket.forEach(featureJsonArray::add);
            featureCollection.add("features", featureJsonArray);
            this.fileHelper.write(this.directory,
                    this.getFilename(challenge, featureJsonArray.size()),
                    featureCollection.toString());
            this.hasWritten = true;
            featureBucket.clear();
        }
        else if (!this.hasWritten)
        {
            logger.warn("Writing empty file with no content in {}.", this.directory);
            this.fileHelper.write(this.directory,
                    String.format("%s%s", "empty",
                            new GeoJsonPathFilter(this.compressOutput).getExtension()),
                    CommonConstants.EMPTY_STRING);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy