All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.clarussecure.dataoperations.splitting.Functions Maven / Gradle / Ivy

The newest version!
package eu.clarussecure.dataoperations.splitting;

import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.GeometryFactory;
import com.vividsolutions.jts.geom.PrecisionModel;
import com.vividsolutions.jts.io.ParseException;
import com.vividsolutions.jts.io.WKBReader;
import com.vividsolutions.jts.io.WKBWriter;
import eu.clarussecure.dataoperations.AttributeNamesUtilities;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public class Functions {

    public static String[][][] anonymize(String[] attributes, String[][] content) {
        String[][][] dataAnom = null;

        reOrderListsAccordingAttributeParameter(attributes);

        if (Record.attrTypes.get(Constants.identifier).equalsIgnoreCase(Constants.splitting)) {
            dataAnom = splitting(content);
            return dataAnom;
        }

        return dataAnom;
    }

    public static String[][] retrieve(String attributeNames[], String[][][] strings) {
        String[][] plainData;
        PrecisionModel pmodel = new PrecisionModel(); // No podem especificar un
                                                      // SRID al GeometryFactory
                                                      // sense passarli un
                                                      // PrecisionModel
        GeometryFactory builder = new GeometryFactory(pmodel, 4326); // GeometryFactory
                                                                     // crea
                                                                     // objectes
                                                                     // geometrics
                                                                     // de gis
        WKBReader reader = new WKBReader(); // Parseja objectes en format WKB
                                            // (Well Known Binary)
        WKBWriter writer = new WKBWriter(2, 2, true); // Converteix objectes de
                                                      // GeoTools
        Geometry geom; // Objecte geometric basic
        int posGeom;
        String attrType, dataType, geomStr, valueX, valueY;
        Coordinate coordX, coordY, newCoord;

        System.out.println("Retrieving...");
        // attributeNames = promise.getAttributeNames();
        reOrderListsAccordingAttributeParameter(attributeNames);

        posGeom = 0;
        for (int i = 0; i < Record.numAttr; i++) { // geometric_object position
            attrType = Record.listAttrTypes.get(i);
            if (attrType.equalsIgnoreCase(Constants.identifier)) {
                dataType = Record.listDataTypes.get(i);
                if (dataType.equalsIgnoreCase(Constants.geometricObject)) {
                    posGeom = i;
                    break;
                }
            }
        }

        plainData = strings[0];
        for (int i = 0; i < plainData.length; i++) {
            try {
                valueX = strings[0][i][posGeom];
                geom = reader.read(WKBReader.hexToBytes(valueX));
                coordX = geom.getCoordinate();
                valueY = strings[1][i][posGeom];
                geom = reader.read(WKBReader.hexToBytes(valueY));
                coordY = geom.getCoordinate();
                newCoord = new Coordinate(coordX.x, coordY.y);
                geom = builder.createPoint(newCoord);
                plainData[i][posGeom] = WKBWriter.toHex(writer.write(geom));
            } catch (ParseException e) {
                e.printStackTrace();
            }
        }

        return plainData;
    }

    public static void reOrderListsAccordingAttributeParameter(String[] attributes) {
        ArrayList newListNames = new ArrayList();
        // AKKA fix: use pattern for attribute matching
        ArrayList newListNamePatterns = new ArrayList();
        ArrayList newListAttrTypes = new ArrayList();
        ArrayList newListDataTypes = new ArrayList();
        String attr, name;
        boolean ok;

        for (int i = 0; i < attributes.length; i++) {
            attr = attributes[i];
            ok = false;
            // AKKA fix: take refListNames, refListNamePatterns, refListAttrTypes and refListDataTypes as reference
            for (int j = 0; j < Record.refListNames.size(); j++) {
                name = Record.refListNames.get(j);
                Pattern pattern = Record.refListNamePatterns.get(j);
                if (pattern.matcher(attr).matches()) {
                    newListNames.add(name);
                    newListNamePatterns.add(Record.refListNamePatterns.get(j));
                    newListAttrTypes.add(Record.refListAttrTypes.get(j));
                    newListDataTypes.add(Record.refListDataTypes.get(j));
                    ok = true;
                    break;
                }
            }
            if (!ok) { // this attribute does not appear in the security policy
                newListNames.add(attr); // it is added as categorical
                                        // non_confidential

                // AKKA fix: use pattern for attribute matching
                newListNamePatterns.add(Pattern.compile(AttributeNamesUtilities.escapeRegex(attr)));
                newListAttrTypes.add(Constants.non_confidential);
                newListDataTypes.add(Constants.categoric);
            }
        }
        Record.listNames = newListNames;
        // AKKA fix: use pattern for attribute matching
        Record.listNamePatterns = newListNamePatterns;
        Record.listAttrTypes = newListAttrTypes;
        Record.listDataTypes = newListDataTypes;
        Record.numAttr = newListNames.size();

    }

    /**
     * This function applies splitting to a dataset
     *
     * @param dataOri,
     *            the dataset
     * @return two anonymized versions of the dataset
     */
    public static String[][][] splitting(String[][] dataOri) {
        // devolver lista de hashmaps
        ArrayList data;
        ArrayList> dataAnom;
        String[][][] dataAnomStr;

        data = createRecords(dataOri);
        dataAnom = splitting(data);
        dataAnomStr = createMatrixStringFromRecords(dataAnom);

        return dataAnomStr;
    }

    public static ArrayList> splitting(ArrayList dataOri) {
        ArrayList> dataAnom = new ArrayList>();
        ArrayList geometricObjects = new ArrayList();
        ArrayList geometricObjectsX = new ArrayList();
        ArrayList geometricObjectsY = new ArrayList();
        PrecisionModel pmodel = new PrecisionModel(); // No podem especificar un
                                                      // SRID al GeometryFactory
                                                      // sense passarli un
                                                      // PrecisionModel
        GeometryFactory builder = new GeometryFactory(pmodel, 4326); // GeometryFactory
                                                                     // crea
                                                                     // objectes
                                                                     // geometrics
                                                                     // de gis
        WKBReader reader = new WKBReader(); // Parseja objectes en format WKB
                                            // (Well Known Binary)
        WKBWriter writer = new WKBWriter(2, 2, true); // Converteix objectes de
                                                      // GeoTools
        Geometry geom; // Objecte geometric basic
        int posGeom;
        String attrType, dataType, geomStr, value;
        Record record, recordX, recordY;
        Random rnd = new Random();
        Coordinate coordX, coordY;

        System.out.println("Splitting...");
        posGeom = 0;
        for (int i = 0; i < Record.numAttr; i++) { // geometric_object position
            attrType = Record.listAttrTypes.get(i);
            if (attrType.equalsIgnoreCase(Constants.identifier)) {
                dataType = Record.listDataTypes.get(i);
                if (dataType.equalsIgnoreCase(Constants.geometricObject)) {
                    posGeom = i;
                    break;
                }
            }
        }

        for (Record reg : dataOri) {
            geomStr = reg.attrValues[posGeom];
            geometricObjects.add(geomStr);
        }

        for (String s : geometricObjects) {
            try {
                geom = reader.read(WKBReader.hexToBytes(s));
                geom.getCoordinate().y = rnd.nextInt(180) - 90;
                // coordX = new Coordinate(geom.getCoordinate().x,
                // rnd.nextInt(180) - 90, geom.getCoordinate().z);
                // coordY = new Coordinate(rnd.nextInt(180) - 90,
                // geom.getCoordinate().y, geom.getCoordinate().z);
                // geom = builder.createPoint(coordX);
                geometricObjectsX.add(WKBWriter.toHex(writer.write(geom)));
                geom = reader.read(WKBReader.hexToBytes(s));
                geom.getCoordinate().x = rnd.nextInt(180) - 90;
                // geom = builder.createPoint(coordY);
                geometricObjectsY.add(WKBWriter.toHex(writer.write(geom)));
            } catch (ParseException e) {
                e.printStackTrace();
            }
        }
        dataAnom.add(new ArrayList()); // X
        dataAnom.add(new ArrayList()); // Y
        for (int i = 0; i < dataOri.size(); i++) {
            record = dataOri.get(i);
            recordX = new Record(record.id);
            recordY = new Record(record.id);
            for (int j = 0; j < Record.numAttr; j++) {
                value = record.attrValues[j];
                if (j == posGeom) {
                    recordX.attrValues[j] = geometricObjectsX.get(i);
                    recordY.attrValues[j] = geometricObjectsY.get(i);
                } else {
                    recordY.attrValues[j] = value; // ojo encriptar
                                                   // confidenciales
                    recordX.attrValues[j] = value; // ojo encriptar
                                                   // confidenciales
                }
            }
            dataAnom.get(0).add(recordX);
            dataAnom.get(1).add(recordY);
        }
        System.out.println("done");

        return dataAnom;
    }

    public static void readProperties(String xml) {
        Document document;

        document = readDocument(xml);
        readProperties(document);
    }

    private static Document readDocumentFromFile(String fileProperties) {
        Document document = null;

        try {
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();
            document = db.parse(new File(fileProperties));
            document.getDocumentElement().normalize();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return document;
    }

    private static Document readDocument(String xml) {
        Document document = null;

        try {
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();
            InputSource is = new InputSource(new StringReader(xml));
            document = db.parse(is);
            document.getDocumentElement().normalize();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return document;
    }

    public static Document readDocument(byte[] xml) {
        Document document = null;

        try {
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();
            InputSource is = new InputSource(new StringReader(new String(xml)));
            document = db.parse(is);
            document.getDocumentElement().normalize();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return document;
    }

    public static void readProperties(Document document) {
        int numQuasis;

        // URV fix: removed header, attribute_separator and record_separator
        // URV fix: elements are retrieved by their name

        Record.attrTypes = getAttributeTypes(document);
        for (String s : Record.attrTypes.values()) {
            if (s.equalsIgnoreCase(Constants.kAnonymity)) {
                Record.k = Integer.parseInt(getK(document));
            }
            if (s.equalsIgnoreCase(Constants.tCloseness)) {
                Record.t = Double.parseDouble(getT(document));
            }
            if (s.equalsIgnoreCase(Constants.splitting)) {
                Record.clouds = Integer.parseInt(getClouds(document));
                Record.splittingType = getSplittingType(document);
            }
            if (s.equalsIgnoreCase(Constants.encryption)) {
                Record.idKey = getIdKey(document);
            }
            if (s.equalsIgnoreCase(Constants.coarsening)) {
                Record.coarsening_type = getCoarseningType(document);
                if (Record.coarsening_type.equalsIgnoreCase(Constants.shift)) {
                    Record.radius = Double.parseDouble(getRadius(document));
                }
                if (Record.coarsening_type.equalsIgnoreCase(Constants.microaggregation)) {
                    Record.k = Integer.parseInt(getCoarseningK(document));
                }
            }
        }
        // AKKA fix: replace unqualified attribute name by a generic qualified
        // one (with asterisks):
        List attributeNames = getAtributeNames(document);
        attributeNames = AttributeNamesUtilities.fullyQualified(attributeNames);
        List attributePatterns = attributeNames.stream().map(AttributeNamesUtilities::escapeRegex)
                .map(Pattern::compile).collect(Collectors.toList());
        // AKKA fix: keep original listNames, listNamePatterns and listAttrTypes
        Record.refListNames = Record.listNames = (ArrayList) attributeNames;
        Record.refListNamePatterns = Record.listNamePatterns = (ArrayList) attributePatterns;
        Record.refListAttrTypes = Record.listAttrTypes = getAtributeTypes(document);
        numQuasis = 0;
        for (String s : Record.listAttrTypes) {
            if (s.equals(Constants.quasiIdentifier)) {
                numQuasis++;
            }
        }
        Record.numQuasi = numQuasis;
        if (Record.numQuasi == 0) {
            Record.attrTypes.put(Constants.quasiIdentifier, "null");
        }
        // AKKA fix: keep original listDataTypes and numAttr
        Record.refListDataTypes = Record.listDataTypes = getAttributeDataTypes(document);
        Record.refNumAttr = Record.numAttr = Record.listAttrTypes.size();
    }

    private static HashMap getAttributeTypes(Document document) {
        HashMap attrTypes = new HashMap();
        Node node;
        NamedNodeMap attributes;
        String type, protection;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.type);
            type = node.getNodeValue();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            attrTypes.put(type, protection);
        }

        return attrTypes;
    }

    private static String getK(Document document) {
        Node node;
        NamedNodeMap attributes;
        String protection;
        String k = null;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            if (protection.equalsIgnoreCase(Constants.kAnonymity)) {
                node = attributes.getNamedItem(Constants.k);
                k = node.getNodeValue();
                break;
            }
        }

        return k;
    }

    private static String getT(Document document) {
        Node node;
        NamedNodeMap attributes;
        String protection;
        String t = null;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            if (protection.equalsIgnoreCase(Constants.tCloseness)) {
                node = attributes.getNamedItem(Constants.t);
                t = node.getNodeValue();
                break;
            }
        }

        return t;
    }

    private static String getClouds(Document document) {
        Node node;
        NamedNodeMap attributes;
        String protection;
        String clouds = null;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            if (protection.equalsIgnoreCase(Constants.splitting)) {
                node = attributes.getNamedItem(Constants.clouds);
                clouds = node.getNodeValue();
                break;
            }
        }

        return clouds;
    }

    private static String getSplittingType(Document document) {
        Node node;
        NamedNodeMap attributes;
        String protection;
        String splittingType = null;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            if (protection.equalsIgnoreCase(Constants.splitting)) {
                node = attributes.getNamedItem(Constants.splittingType);
                splittingType = node.getNodeValue();
                break;
            }
        }

        return splittingType;
    }

    private static String getIdKey(Document document) {
        Node node;
        NamedNodeMap attributes;
        String protection;
        String idKey = null;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            if (protection.equalsIgnoreCase(Constants.encryption)) {
                node = attributes.getNamedItem(Constants.id_key);
                idKey = node.getNodeValue();
                break;
            }
        }

        return idKey;
    }

    private static String getRadius(Document document) {
        Node node;
        NamedNodeMap attributes;
        String protection;
        String radius = null;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            if (protection.equalsIgnoreCase(Constants.coarsening)) {
                node = attributes.getNamedItem(Constants.radius);
                radius = node.getNodeValue();
                break;
            }
        }

        return radius;
    }

    private static String getCoarseningType(Document document) {
        Node node;
        NamedNodeMap attributes;
        String protection;
        String type = null;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            if (protection.equalsIgnoreCase(Constants.coarsening)) {
                node = attributes.getNamedItem(Constants.coarseningType);
                type = node.getNodeValue();
                break;
            }
        }

        return type;
    }

    private static String getCoarseningK(Document document) {
        Node node;
        NamedNodeMap attributes;
        String protection;
        String k = null;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attributeType);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.protection);
            protection = node.getNodeValue();
            if (protection.equalsIgnoreCase(Constants.coarsening)) {
                node = attributes.getNamedItem(Constants.k);
                k = node.getNodeValue();
                break;
            }
        }

        return k;
    }

    private static ArrayList getAtributeNames(Document document) {
        ArrayList names = new ArrayList();
        Node node;
        NamedNodeMap attributes;
        String name;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attribute);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.name);
            name = node.getNodeValue();
            names.add(name);
        }

        return names;
    }

    private static ArrayList getAtributeTypes(Document document) {
        ArrayList attrTypes = new ArrayList();
        Node node;
        NamedNodeMap attributes;
        String attrType;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attribute);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.attributeType);
            attrType = node.getNodeValue();
            attrTypes.add(attrType);
        }

        return attrTypes;
    }

    private static ArrayList getAttributeDataTypes(Document document) {
        ArrayList attrTypes = new ArrayList();
        Node node;
        NamedNodeMap attributes;
        String attrType;
        NodeList nodeList;

        // URV fix: elements are retrieved by their name
        nodeList = document.getElementsByTagName(Constants.attribute);

        for (int i = 0; i < nodeList.getLength(); i++) {
            node = nodeList.item(i);
            attributes = node.getAttributes();
            node = attributes.getNamedItem(Constants.dataType);
            if (node == null) {
                attrTypes.add("");
            } else {
                attrType = node.getNodeValue();
                attrTypes.add(attrType);
            }
        }

        return attrTypes;
    }

    public static ArrayList createRecords(String data) {
        ArrayList records = new ArrayList();
        String recordsStr[];
        String strTemp[];
        Record record;
        int id;

        recordsStr = data.split(Record.recordSeparator);
        id = 0;
        for (int i = 0; i < recordsStr.length; i++) {
            strTemp = recordsStr[i].split(Record.attributeSeparator);
            record = new Record(id);
            id++;
            for (int j = 0; j < Record.numAttr; j++) {
                record.attrValues[j] = strTemp[j];
            }
            records.add(record);
        }

        System.out.println("Records loaded: " + records.size());
        return records;
    }

    public static ArrayList createRecords(String[][] data) {
        ArrayList records = new ArrayList();
        Record record = null;
        int id;

        id = 0;
        for (int i = 0; i < data.length; i++) {
            record = new Record(id);
            id++;
            for (int j = 0; j < data[i].length; j++) {
                record.attrValues[j] = data[i][j];
            }
            records.add(record);
        }

        System.out.println("Records loaded: " + records.size());
        return records;
    }

    public static String[][][] createMatrixStringFromRecords(ArrayList> records) {
        String data[][][];
        String dataTemp[][];
        Record record;
        ArrayList dataList;

        dataList = records.get(0);
        dataTemp = new String[dataList.size()][];
        for (int i = 0; i < dataList.size(); i++) {
            record = dataList.get(i);
            dataTemp[i] = record.toVectorString();
        }

        data = new String[2][dataTemp.length][];
        data[0] = dataTemp;

        dataList = records.get(1);
        dataTemp = new String[dataList.size()][];
        for (int i = 0; i < dataList.size(); i++) {
            record = dataList.get(i);
            dataTemp[i] = record.toVectorString();
        }
        data[1] = dataTemp;

        System.out.println(data.length + " records converted to String matrix");
        return data;
    }

    @Deprecated
    public static void writeFile(ArrayList> data) {
        File file;
        FileWriter fw;
        BufferedWriter bw;
        String fileName;
        int cont;

        for (int i = 0; i < data.size(); i++) {
            cont = 0;
            if (Record.header) {
                addCabecera(data.get(i));
                cont = -1;
            }
            fileName = "data_clarus_anom_" + (i + 1) + ".txt";
            file = new File(fileName);
            try {
                fw = new FileWriter(file);
                bw = new BufferedWriter(fw);
                for (Record r : data.get(i)) {
                    bw.write(r.toString());
                    bw.newLine();
                    cont++;
                }
                bw.close();
                fw.close();

                System.out.println("Records saved: " + cont);
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }

    private static void addCabecera(ArrayList lista) {
        Record record;

        record = new Record(0);
        for (int i = 0; i < Record.listNames.size(); i++) {
            record.attrValues[i] = Record.listNames.get(i);
        }
        lista.add(0, record);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy