All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.bitsensor.lib.privacy.PrivacyProcessor Maven / Gradle / Ivy

package io.bitsensor.lib.privacy;

import io.bitsensor.lib.entity.proto.Datapoint.Builder;
import io.bitsensor.lib.entity.proto.PersonalData;
import io.bitsensor.proto.shaded.com.google.common.base.Charsets;
import io.bitsensor.proto.shaded.com.google.common.base.Function;
import io.bitsensor.proto.shaded.com.google.common.base.Predicate;
import io.bitsensor.proto.shaded.com.google.common.base.Predicates;
import io.bitsensor.proto.shaded.com.google.common.collect.FluentIterable;
import io.bitsensor.proto.shaded.com.google.common.collect.Iterables;
import io.bitsensor.proto.shaded.com.google.common.collect.Sets;
import io.bitsensor.proto.shaded.com.google.common.hash.HashFunction;
import io.bitsensor.proto.shaded.com.google.common.hash.Hashing;
import io.dataapps.chlorine.finder.Finder;
import io.dataapps.chlorine.finder.FinderEngine;
import io.dataapps.chlorine.pattern.RegexFinder;

import java.util.*;

/**
 * Privacy processor for privacy data loss prevention.
 */
public class PrivacyProcessor {
    public static final String VALUES = "_values";
    public static final String TYPES = "_types";
    private static final boolean ADD_DEFAULT_FINDERS = false;

    private FinderEngine finderEngine;
    private String salt = "";

    private Map filter,
            keyFilter,
            valueFilter;

    public PrivacyProcessor(String salt, Map filter) {
        this(salt, filter, ADD_DEFAULT_FINDERS);
    }

    public PrivacyProcessor(String salt, Map filter, boolean addDefaultFinders) {
        if (salt != null && !salt.trim().isEmpty())
            this.salt = salt;

        if (filter == null) {
            finderEngine = new FinderEngine((List) null, addDefaultFinders);
            return;
        }

        this.filter = formatFilter(filter);
        this.keyFilter = createKeyFilter(this.filter);
        this.valueFilter = createValueFilter(this.filter);

        List finders = new ArrayList<>();
        for (Map.Entry valueFilter : getValueFilter().entrySet()) {
            finders.add(new RegexFinder(valueFilter.getKey(), valueFilter.getValue().getMatch()));
        }

        finderEngine = new FinderEngine(finders, addDefaultFinders);
    }

    /**
     * Returns filters that are Chlorine-Finder compatible.
     */
    private static Map formatFilter(Map filterMap) {
        Map formattedFilterMap = new HashMap<>();

        for (Map.Entry filterEntry : filterMap.entrySet()) {
            formattedFilterMap.put(
                    filterEntry.getKey()
                            .toLowerCase()
                            .replaceAll("\\.", "_")
                            .replaceAll("\\s", ""),
                    filterEntry.getValue());
        }

        return formattedFilterMap;
    }

    /**
     * Returns filters that are key filters.
     */
    private static Map createKeyFilter(Map filter) {
        Map keyFilter = new HashMap<>();
        for (Map.Entry f : filter.entrySet()) {
            if (f.getValue().getTarget() == null
                    || f.getValue().getMatch() == null
                    || !f.getValue().getTarget().equals("key")
                    || f.getValue().getMatch().trim().isEmpty()) {
                continue;
            }

            keyFilter.put(f.getKey(), f.getValue());
        }
        return keyFilter;
    }

    /**
     * Returns filters that are value filters.
     */
    private static Map createValueFilter(Map filter) {
        Map valueFilter = new HashMap<>();
        for (Map.Entry f : filter.entrySet()) {
            if (f.getValue().getTarget() == null
                    || f.getValue().getMatch() == null
                    || !f.getValue().getTarget().equals("value")
                    || f.getValue().getMatch().trim().isEmpty()) {
                continue;
            }

            valueFilter.put(f.getKey(), f.getValue());
        }
        return valueFilter;
    }

    public String getSalt() {
        return salt;
    }

    public Map getFilter() {
        if (filter == null)
            return Collections.emptyMap();

        return filter;
    }

    public Map getKeyFilter() {
        if (keyFilter == null)
            return Collections.emptyMap();

        return keyFilter;
    }

    public Map getValueFilter() {
        if (valueFilter == null)
            return Collections.emptyMap();

        return valueFilter;
    }

    /**
     * Returns a modified {@code Datapoint.Builder} object that contains extracted/redacted personal information if
     * found.
     *
     * @param builder a {@code Datapoint.Builder} input
     * @return a modified {@code Datapoint.Builder} or the original input.
     */
    public Builder process(final Builder builder) {
        if (builder == null || builder.getInputCount() == 0)
            return builder;

        processInputMap(builder, new BiConsumer() {
            @Override
            public void accept(String key, String value) {
                builder.putInput(key, value);
            }
        });
        processContextMap(builder, new BiConsumer() {
            @Override
            public void accept(String key, String value) {
                builder.putContext(key, value);
            }
        });
        processEndpointMap(builder, new BiConsumer() {
            @Override
            public void accept(String key, String value) {
                builder.putEndpoint(key, value);
            }
        });

        updateTypes(builder);
        updateValues(builder);

        return builder;
    }

    private void processContextMap(Builder builder, BiConsumer consumer) {
        for (String key : Arrays.asList(/*"username", */"http.referer")) {
            String value = builder.getContextOrDefault(key, "");

            filterValue(builder, consumer, key, value);
        }
    }

    private void processEndpointMap(Builder builder, BiConsumer consumer) {
        for (String key : Arrays.asList("uri", "query", "resource", "uriDecoded", "location")) {
            String value = builder.getEndpointOrDefault(key, "");

            filterValue(builder, consumer, key, value);
        }
    }

    private void processInputMap(Builder builder, BiConsumer consumer) {
        for (Map.Entry entry : builder.getInputMap().entrySet()) {
            if (entry.getValue().trim().isEmpty())
                return;

            // Target KEY
            if (!filterKey(builder, consumer, entry.getKey(), entry.getValue()))
                // If key is not filtered, check its value
                // Target VALUE
                filterValue(builder, consumer, entry.getKey(), entry.getValue());
        }
    }

    private boolean filterKey(Builder builder, BiConsumer consumer, String key, String value) {
        Map.Entry matchedKeyFilter = null;

        for (Map.Entry entry : getKeyFilter().entrySet()) {
            if (entry.getValue().getMatch().equals(key) || key.matches(entry.getValue().getMatch())) {
                matchedKeyFilter = entry;
                break;
            }
        }

        if (matchedKeyFilter == null) {
            return false;
        }

        PersonalData pd = builder.getPersonalDataOrDefault(matchedKeyFilter.getKey(), PersonalData.getDefaultInstance());
        if (matchedKeyFilter.getValue().isSanitize()) {
            value = createHash(value);
            consumer.accept(key, value);
        }

        if (!pd.getValueList().contains(value))
            builder.putPersonalData(matchedKeyFilter.getKey(), pd.toBuilder().addValue(value).build());

        return true;
    }

    private void filterValue(Builder builder, BiConsumer consumer, String key, String value) {
        Map> matchesByType = finderEngine.findWithType(value);
        if (matchesByType.isEmpty())
            return;

        for (Map.Entry> entry : matchesByType.entrySet()) {
            final String typeLowerCase = entry.getKey().toLowerCase().replaceAll("\\s", "");
            Set dataValues = Sets.newHashSet(entry.getValue());
            PersonalData pd = builder.getPersonalDataOrDefault(typeLowerCase, PersonalData.getDefaultInstance());

            PrivacyFilter matchedPrivacyFilter = Iterables.tryFind(getFilter().entrySet(),
                    new Predicate>() {
                        @Override
                        public boolean apply(Map.Entry input) {
                            return input.getKey().equals(typeLowerCase);
                        }
                    })
                    .transform(new Function, PrivacyFilter>() {
                        @Override
                        public PrivacyFilter apply(Map.Entry input) {
                            return input.getValue();
                        }
                    })
                    .orNull();

            // Sanitizing sensitive data value
            if (matchedPrivacyFilter != null && matchedPrivacyFilter.isSanitize()) {
                String sanitizedValue = value;

                Set newDataValues = new HashSet<>();
                for (String dataValue : dataValues) {
                    String sanitizedDataValue = typeLowerCase + "-" + createHash(dataValue);

                    sanitizedValue = sanitizedValue.replace(dataValue, sanitizedDataValue);

                    newDataValues.add(sanitizedDataValue);
                }

                dataValues = newDataValues;
                consumer.accept(key, sanitizedValue);
            }

            // Merge all data values from different filter results
            dataValues.addAll(pd.getValueList());

            builder.putPersonalData(typeLowerCase, pd.toBuilder().clearValue().addAllValue(dataValues).build());
        }
    }

    private void updateTypes(Builder builder) {
        PersonalData pdTypes = builder.getPersonalDataOrDefault(TYPES, PersonalData.getDefaultInstance());

        Set typeSet = Sets.filter(builder.getPersonalDataMap().keySet(),
                Predicates.and(
                        Predicates.not(Predicates.equalTo(TYPES)),
                        Predicates.not(Predicates.equalTo(VALUES))
                ));

        if (typeSet.isEmpty())
            return;

        builder.putPersonalData(TYPES, pdTypes.toBuilder().clearValue().addAllValue(typeSet).build());
    }

    private void updateValues(Builder builder) {
        PersonalData pdValues = builder.getPersonalDataOrDefault(VALUES, PersonalData.getDefaultInstance());

        // Collect all personalData values
        Set valueSet = FluentIterable.from(builder.getPersonalDataMap().entrySet())
                .filter(new Predicate>() {
                    @Override
                    public boolean apply(Map.Entry input) {
                        return !input.getKey().equals(TYPES) && !input.getKey().equals(VALUES);
                    }
                })
                .transformAndConcat(new Function, Iterable>() {
                    @Override
                    public Iterable apply(Map.Entry input) {
                        return input.getValue().getValueList();
                    }
                }).toSet();

        if (valueSet.isEmpty())
            return;

        builder.putPersonalData(VALUES, pdValues.toBuilder().clearValue().addAllValue(valueSet).build());
    }

    String createHash(String data) {
        HashFunction hf = Hashing.murmur3_128(64);
        return hf.hashString(salt + data, Charsets.UTF_8).toString();
    }

    public interface BiConsumer {

        /**
         * Performs this operation on the given arguments.
         *
         * @param t the first input argument
         * @param u the second input argument
         */
        void accept(T t, U u);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy