All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sonar.duplications.block.BlockChunker Maven / Gradle / Ivy

There is a newer version: 7.0
Show newest version
/*
 * SonarQube
 * Copyright (C) 2009-2017 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.duplications.block;

import java.util.ArrayList;
import org.sonar.duplications.statement.Statement;

import java.util.Collections;
import java.util.List;

/**
 * Creates blocks from statements, each block will contain specified number of statements (blockSize) and 64-bits (8-bytes) hash value.
 * Hash value computed using
 * Rabin-Karp rolling hash :
 * 
 * s[0]*31^(blockSize-1) + s[1]*31^(blockSize-2) + ... + s[blockSize-1]
 * 
* using long arithmetic, where s[i] * is the hash code of String (which is cached) for statement with number i. * Thus running time - O(N), where N - number of statements. * Implementation fully thread-safe. */ public class BlockChunker { private static final long PRIME_BASE = 31; private final int blockSize; private final long power; public BlockChunker(int blockSize) { this.blockSize = blockSize; long pow = 1; for (int i = 0; i < blockSize - 1; i++) { pow = pow * PRIME_BASE; } this.power = pow; } public List chunk(String resourceId, List statements) { List filtered = new ArrayList<>(); int i = 0; while (i < statements.size()) { Statement first = statements.get(i); int j = i + 1; while (j < statements.size() && statements.get(j).getValue().equals(first.getValue())) { j++; } filtered.add(statements.get(i)); if (i < j - 1) { filtered.add(statements.get(j - 1)); } i = j; } statements = filtered; if (statements.size() < blockSize) { return Collections.emptyList(); } Statement[] statementsArr = statements.toArray(new Statement[statements.size()]); List blocks = new ArrayList<>(statementsArr.length - blockSize + 1); long hash = 0; int first = 0; int last = 0; for (; last < blockSize - 1; last++) { hash = hash * PRIME_BASE + statementsArr[last].getValue().hashCode(); } Block.Builder blockBuilder = Block.builder().setResourceId(resourceId); for (; last < statementsArr.length; last++, first++) { Statement firstStatement = statementsArr[first]; Statement lastStatement = statementsArr[last]; // add last statement to hash hash = hash * PRIME_BASE + lastStatement.getValue().hashCode(); // create block Block block = blockBuilder.setBlockHash(new ByteArray(hash)) .setIndexInFile(first) .setLines(firstStatement.getStartLine(), lastStatement.getEndLine()) .build(); blocks.add(block); // remove first statement from hash hash -= power * firstStatement.getValue().hashCode(); } return blocks; } public int getBlockSize() { return blockSize; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy