All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.gitlab.chaver.mining.patterns.util.PatternUtil Maven / Gradle / Ivy

There is a newer version: 1.0.1
Show newest version
/*
 * This file is part of io.gitlab.chaver:data-mining (https://gitlab.com/chaver/data-mining)
 *
 * Copyright (c) 2022, IMT Atlantique
 *
 * Licensed under the MIT license.
 *
 * See LICENSE file in the project root for full license information.
 */
package io.gitlab.chaver.mining.patterns.util;

import io.gitlab.chaver.mining.patterns.io.Database;
import io.gitlab.chaver.mining.patterns.io.Pattern;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;

public class PatternUtil {

    /**
     * Read a stream of patterns in .txt format
     * @param stream input stream of skypatterns
     * @return The list of patterns contained in the stream
     */
    public static List readPatternStream(InputStream stream) throws IOException {
        List skypatterns = new LinkedList<>();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
            String line;
            while ((line = reader.readLine()) != null) {
                if (line.isEmpty()) continue;
                if (line.startsWith("-")) break;
                String[] lineSplit = line.split(":");
                String[] itemString = lineSplit[0].split(" ");
                int[] itemNames = new int[itemString.length];
                for (int i = 0; i < itemString.length; i++) {
                    itemNames[i] = Integer.parseInt(itemString[i]);
                }
                int[] measures = new int[lineSplit.length - 1];
                for (int i = 0; i < lineSplit.length - 1; i++) {
                    String currentMeasure = lineSplit[i + 1].replaceAll("\\s", "");
                    measures[i] = Integer.parseInt(currentMeasure);
                }
                skypatterns.add(new Pattern(itemNames, measures));
            }
        }
        return skypatterns;
    }

    /**
     * Find closed pattern (w.r.t. {freq}) of p
     * @param p pattern
     * @param database database
     * @return the closed pattern associated to p
     */
    public static int[] findClosedPattern(Pattern p, Database database) {
        BitSet cover = new BitSet(database.getNbTransactions());
        Map itemIndexes = database.getItemsMap();
        cover.set(0, database.getNbTransactions());
        for (int i : p.getItems()) {
            cover.and(database.getVerticalRepresentation()[itemIndexes.get(i)]);
        }
        Set closure = new TreeSet<>();
        for (int i = database.getNbClass(); i < database.getNbItems(); i++) {
            BitSet temp = (BitSet) cover.clone();
            temp.and(database.getVerticalRepresentation()[i]);
            if (temp.cardinality() == cover.cardinality()) {
                closure.add(database.getItems()[i]);
            }
        }
        return closure.stream().mapToInt(Integer::intValue).toArray();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy