All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sonar.api.batch.fs.internal.FileMetadata Maven / Gradle / Ivy

There is a newer version: 9.4.0.54424
Show newest version
/*
 * SonarQube
 * Copyright (C) 2009-2016 SonarSource SA
 * mailto:contact AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.api.batch.fs.internal;

import com.google.common.primitives.Ints;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.CheckForNull;
import javax.annotation.Nullable;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;
import org.sonar.api.CoreProperties;
import org.sonar.api.batch.ScannerSide;
import org.sonar.api.utils.log.Logger;
import org.sonar.api.utils.log.Loggers;

/**
 * Computes hash of files. Ends of Lines are ignored, so files with
 * same content but different EOL encoding have the same hash.
 */
@ScannerSide
public class FileMetadata {

  private static final Logger LOG = Loggers.get(FileMetadata.class);

  private static final char LINE_FEED = '\n';
  private static final char CARRIAGE_RETURN = '\r';

  public abstract static class CharHandler {

    protected void handleAll(char c) {
    }

    protected void handleIgnoreEoL(char c) {
    }

    protected void newLine() {
    }

    protected void eof() {
    }
  }

  private static class LineCounter extends CharHandler {
    private int lines = 1;
    private int nonBlankLines = 0;
    private boolean blankLine = true;
    boolean alreadyLoggedInvalidCharacter = false;
    private final File file;
    private final Charset encoding;

    LineCounter(File file, Charset encoding) {
      this.file = file;
      this.encoding = encoding;
    }

    @Override
    protected void handleAll(char c) {
      if (!alreadyLoggedInvalidCharacter && c == '\ufffd') {
        LOG.warn("Invalid character encountered in file {} at line {} for encoding {}. Please fix file content or configure the encoding to be used using property '{}'.", file,
          lines, encoding, CoreProperties.ENCODING_PROPERTY);
        alreadyLoggedInvalidCharacter = true;
      }
    }

    @Override
    protected void newLine() {
      lines++;
      if (!blankLine) {
        nonBlankLines++;
      }
      blankLine = true;
    }

    @Override
    protected void handleIgnoreEoL(char c) {
      if (!Character.isWhitespace(c)) {
        blankLine = false;
      }
    }

    @Override
    protected void eof() {
      if (!blankLine) {
        nonBlankLines++;
      }
    }

    public int lines() {
      return lines;
    }

    public int nonBlankLines() {
      return nonBlankLines;
    }

  }

  private static class FileHashComputer extends CharHandler {
    private MessageDigest globalMd5Digest = DigestUtils.getMd5Digest();
    private StringBuilder sb = new StringBuilder();
    private final CharsetEncoder encoder;
    private final File file;

    public FileHashComputer(File f) {
      encoder = StandardCharsets.UTF_8.newEncoder()
        .onMalformedInput(CodingErrorAction.REPLACE)
        .onUnmappableCharacter(CodingErrorAction.REPLACE);
      file = f;
    }

    @Override
    protected void handleIgnoreEoL(char c) {
      sb.append(c);
    }

    @Override
    protected void newLine() {
      sb.append(LINE_FEED);
      processBuffer();
      sb.setLength(0);
    }

    @Override
    protected void eof() {
      if (sb.length() > 0) {
        processBuffer();
      }
    }

    private void processBuffer() {
      try {
        if (sb.length() > 0) {
          ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb));
          globalMd5Digest.update(encoded.array(), 0, encoded.limit());
        }
      } catch (CharacterCodingException e) {
        throw new IllegalStateException("Error encoding line hash in file: " + file.getAbsolutePath(), e);
      }
    }

    @CheckForNull
    public String getHash() {
      return Hex.encodeHexString(globalMd5Digest.digest());
    }
  }

  private static class LineHashComputer extends CharHandler {
    private final MessageDigest lineMd5Digest = DigestUtils.getMd5Digest();
    private final CharsetEncoder encoder;
    private final StringBuilder sb = new StringBuilder();
    private final LineHashConsumer consumer;
    private final File file;
    private int line = 1;

    public LineHashComputer(LineHashConsumer consumer, File f) {
      this.consumer = consumer;
      this.file = f;
      this.encoder = StandardCharsets.UTF_8.newEncoder()
        .onMalformedInput(CodingErrorAction.REPLACE)
        .onUnmappableCharacter(CodingErrorAction.REPLACE);
    }

    @Override
    protected void handleIgnoreEoL(char c) {
      if (!Character.isWhitespace(c)) {
        sb.append(c);
      }
    }

    @Override
    protected void newLine() {
      processBuffer();
      sb.setLength(0);
      line++;
    }

    @Override
    protected void eof() {
      if (this.line > 0) {
        processBuffer();
      }
    }

    private void processBuffer() {
      try {
        if (sb.length() > 0) {
          ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb));
          lineMd5Digest.update(encoded.array(), 0, encoded.limit());
          consumer.consume(line, lineMd5Digest.digest());
        }
      } catch (CharacterCodingException e) {
        throw new IllegalStateException("Error encoding line hash in file: " + file.getAbsolutePath(), e);
      }
    }
  }

  private static class LineOffsetCounter extends CharHandler {
    private int currentOriginalOffset = 0;
    private List originalLineOffsets = new ArrayList<>();
    private int lastValidOffset = 0;

    public LineOffsetCounter() {
      originalLineOffsets.add(0);
    }

    @Override
    protected void handleAll(char c) {
      currentOriginalOffset++;
    }

    @Override
    protected void newLine() {
      originalLineOffsets.add(currentOriginalOffset);
    }

    @Override
    protected void eof() {
      lastValidOffset = currentOriginalOffset;
    }

    public List getOriginalLineOffsets() {
      return originalLineOffsets;
    }

    public int getLastValidOffset() {
      return lastValidOffset;
    }

  }

  /**
   * Compute hash of a file ignoring line ends differences.
   * Maximum performance is needed.
   */
  public Metadata readMetadata(File file, Charset encoding) {
    LineCounter lineCounter = new LineCounter(file, encoding);
    FileHashComputer fileHashComputer = new FileHashComputer(file);
    LineOffsetCounter lineOffsetCounter = new LineOffsetCounter();
    readFile(file, encoding, lineCounter, fileHashComputer, lineOffsetCounter);
    return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(),
      lineOffsetCounter.getLastValidOffset());
  }

  /**
   * For testing purpose
   */
  public Metadata readMetadata(Reader reader) {
    LineCounter lineCounter = new LineCounter(new File("fromString"), StandardCharsets.UTF_16);
    FileHashComputer fileHashComputer = new FileHashComputer(new File("fromString"));
    LineOffsetCounter lineOffsetCounter = new LineOffsetCounter();
    try {
      read(reader, lineCounter, fileHashComputer, lineOffsetCounter);
    } catch (IOException e) {
      throw new IllegalStateException("Should never occurs", e);
    }
    return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(),
      lineOffsetCounter.getLastValidOffset());
  }

  public static void readFile(File file, Charset encoding, CharHandler... handlers) {
    try (BOMInputStream bomIn = new BOMInputStream(new FileInputStream(file),
      ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
      Reader reader = new BufferedReader(new InputStreamReader(bomIn, encoding))) {
      read(reader, handlers);
    } catch (IOException e) {
      throw new IllegalStateException(String.format("Fail to read file '%s' with encoding '%s'", file.getAbsolutePath(), encoding), e);
    }
  }

  private static void read(Reader reader, CharHandler... handlers) throws IOException {
    char c;
    int i = reader.read();
    boolean afterCR = false;
    while (i != -1) {
      c = (char) i;
      if (afterCR) {
        for (CharHandler handler : handlers) {
          if (c == CARRIAGE_RETURN) {
            handler.newLine();
            handler.handleAll(c);
          } else if (c == LINE_FEED) {
            handler.handleAll(c);
            handler.newLine();
          } else {
            handler.newLine();
            handler.handleIgnoreEoL(c);
            handler.handleAll(c);
          }
        }
        afterCR = c == CARRIAGE_RETURN;
      } else if (c == LINE_FEED) {
        for (CharHandler handler : handlers) {
          handler.handleAll(c);
          handler.newLine();
        }
      } else if (c == CARRIAGE_RETURN) {
        afterCR = true;
        for (CharHandler handler : handlers) {
          handler.handleAll(c);
        }
      } else {
        for (CharHandler handler : handlers) {
          handler.handleIgnoreEoL(c);
          handler.handleAll(c);
        }
      }
      i = reader.read();
    }
    for (CharHandler handler : handlers) {
      if (afterCR) {
        handler.newLine();
      }
      handler.eof();
    }
  }

  public static class Metadata {
    final int lines;
    final int nonBlankLines;
    final String hash;
    final int[] originalLineOffsets;
    final int lastValidOffset;

    private Metadata(int lines, int nonBlankLines, String hash, List originalLineOffsets, int lastValidOffset) {
      this.lines = lines;
      this.nonBlankLines = nonBlankLines;
      this.hash = hash;
      this.originalLineOffsets = Ints.toArray(originalLineOffsets);
      this.lastValidOffset = lastValidOffset;
    }
  }

  public interface LineHashConsumer {

    void consume(int lineIdx, @Nullable byte[] hash);

  }

  /**
   * Compute a MD5 hash of each line of the file after removing of all blank chars
   */
  public static void computeLineHashesForIssueTracking(DefaultInputFile f, LineHashConsumer consumer) {
    readFile(f.file(), f.charset(), new LineHashComputer(consumer, f.file()));
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy