All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sonar.plugins.common.NotBinaryFilePredicate Maven / Gradle / Ivy

/*
 * SonarQube Text Plugin
 * Copyright (C) 2021-2024 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the Sonar Source-Available License Version 1, as published by SonarSource SA.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the Sonar Source-Available License for more details.
 *
 * You should have received a copy of the Sonar Source-Available License
 * along with this program; if not, see https://sonarsource.com/license/ssal/
 */
package org.sonar.plugins.common;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.sonar.api.batch.fs.FilePredicate;
import org.sonar.api.batch.fs.InputFile;
import org.sonar.plugins.secrets.api.EntropyChecker;

public class NotBinaryFilePredicate implements FilePredicate {

  private static final Set DEFAULT_BINARY_EXTENSIONS = new HashSet<>(Arrays.asList(
    "3dm",
    "3ds",
    "3g2",
    "3gp",
    "7z",
    "a",
    "aac",
    "aar",
    "adp",
    "ai",
    "aif",
    "aiff",
    "alz",
    "amr",
    "ape",
    "apk",
    "appimage",
    "ar",
    "arj",
    "asf",
    "at",
    "au",
    "avi",
    "b",
    "bak",
    "baml",
    "bfc",
    "bh",
    "bin",
    "bk",
    "bmp",
    "br",
    "btif",
    "bytes",
    "bz2",
    "bzip2",
    "cab",
    "caf",
    "cer",
    "cfe",
    "cfs",
    "cgm",
    "changesubtype",
    "ckp",
    "class",
    "cmx",
    "cpio",
    "cr2",
    "cur",
    "dat",
    "data",
    "db",
    "dcm",
    "deb",
    "dex",
    "dii",
    "dim",
    "djvu",
    "dll",
    "dmg",
    "dng",
    "doc",
    "docm",
    "docx",
    "dot",
    "dotm",
    "dra",
    "ds_store",
    "dsk",
    "dts",
    "dtshd",
    "dvb",
    "dvd",
    "dvm",
    "dwg",
    "dxf",
    "dylib",
    "ecelp4800",
    "ecelp7470",
    "ecelp9600",
    "egg",
    "enc",
    "eol",
    "eot",
    "eps",
    "epub",
    "exe",
    "exec",
    "f4v",
    "fbs",
    "fdm",
    "fdt",
    "fdx",
    "fe",
    "fh",
    "fla",
    "flac",
    "flatpak",
    "fli",
    "flv",
    "fnm",
    "fpx",
    "fst",
    "fvt",
    "g3",
    "gch",
    "gem",
    "gem",
    "gh",
    "gif",
    "gpg",
    "graffle",
    "gz",
    "gzip",
    "h261",
    "h263",
    "h264",
    "heif",
    "hmap",
    "icns",
    "ico",
    "idx",
    "ief",
    "img",
    "ipa",
    "ir",
    "iso",
    "jar",
    "jce",
    "jce",
    "jks",
    "jks",
    "jnilib",
    "jpeg",
    "jpg",
    "jpgv",
    "jpm",
    "jxr",
    "kdbx",
    "kdd",
    "kdi",
    "kdm",
    "key",
    "keystore",
    "keystream",
    "kjsm",
    "kotlin_module",
    "ktx",
    "ldf",
    "lha",
    "lib",
    "lvp",
    "lz",
    "lzh",
    "lzma",
    "lzo",
    "m3u",
    "m4a",
    "m4v",
    "macho32",
    "macho64",
    "mar",
    "mdf",
    "mdi",
    "meta",
    "mht",
    "mid",
    "midi",
    "mj2",
    "mka",
    "mkv",
    "mmdb",
    "mmr",
    "mng",
    "mo",
    "mobi",
    "mobileprovision",
    "mov",
    "movie",
    "mp3",
    "mp4",
    "mp4a",
    "mpeg",
    "mpg",
    "mpga",
    "mxu",
    "nef",
    "nes",
    "nib",
    "node",
    "npx",
    "npz",
    "numbers",
    "nupkg",
    "nvd",
    "nvm",
    "o",
    "odp",
    "ods",
    "odt",
    "oga",
    "ogg",
    "ogv",
    "otf",
    "ott",
    "p12",
    "pages",
    "pbm",
    "pcap",
    "pch",
    "pcx",
    "pdb",
    "pdf",
    "pea",
    "pf",
    "pfx",
    "pgm",
    "phar",
    "pic",
    "pkcs12",
    "pkenc",
    "pkg",
    "pkl",
    "plaso",
    "plist",
    "png",
    "pnm",
    "pos",
    "pot",
    "potm",
    "potx",
    "ppa",
    "ppam",
    "ppm",
    "pps",
    "ppsm",
    "ppsx",
    "ppt",
    "pptm",
    "pptx",
    "proto",
    "protobuf",
    "ps",
    "psd",
    "pxm",
    "pya",
    "pyc",
    "pyo",
    "pyv",
    "qt",
    "rar",
    "ras",
    "raw",
    "res",
    "resources",
    "rgb",
    "rip",
    "rlc",
    "rmf",
    "rmvb",
    "rpm",
    "rsc",
    "rtf",
    "rz",
    "s3m",
    "s7z",
    "scc",
    "scpt",
    "sgi",
    "shar",
    "si",
    "signature",
    "sil",
    "sketch",
    "slk",
    "smv",
    "snap",
    "snk",
    "so",
    "sqlite",
    "sqlite3",
    "st",
    "stack2",
    "stl",
    "sub",
    "suo",
    "swc",
    "swf",
    "tab",
    "tab_i",
    "tar",
    "tbz",
    "tbz2",
    "tga",
    "tgz",
    "thmx",
    "tif",
    "tiff",
    "tim",
    "tip",
    "tlog",
    "tlz",
    "tmd",
    "truststore",
    "ttc",
    "ttf",
    "tvd",
    "tvm",
    "tvx",
    "txz",
    "typedefs",
    "ucfg",
    "ucfgs",
    "udf",
    "uvh",
    "uvi",
    "uvm",
    "uvp",
    "uvs",
    "uvu",
    "viv",
    "vob",
    "war",
    "wasm",
    "wav",
    "wax",
    "wbmp",
    "wdp",
    "weba",
    "webm",
    "webp",
    "whl",
    "wim",
    "wm",
    "wma",
    "wmv",
    "wmx",
    "woff",
    "woff2",
    "wrm",
    "wvx",
    "xbm",
    "xcf",
    "xcuserstate",
    "xif",
    "xla",
    "xlam",
    "xls",
    "xlsb",
    "xlsm",
    "xlsx",
    "xlt",
    "xltm",
    "xltx",
    "xm",
    "xmind",
    "xpi",
    "xpm",
    "xwd",
    "xz",
    "z",
    "zip",
    "zipx",
    "zstd"));

  private static final Set DEFAULT_BINARY_SUFFIXES = Set.of("cacerts");

  private static final Pattern HEX_REGEX = Pattern.compile("\\p{XDigit}++");
  private static final double MD5_AND_SHA_MIN_ENTROPY = 3.1;

  private final Set binaryFileExtensions;
  private final Set binaryFileSuffixes;

  public NotBinaryFilePredicate(String... additionalBinarySuffixes) {
    binaryFileExtensions = new HashSet<>(DEFAULT_BINARY_EXTENSIONS);
    binaryFileSuffixes = new HashSet<>(DEFAULT_BINARY_SUFFIXES);
    Set cleanedSuffixes = Arrays.stream(additionalBinarySuffixes)
      .map(String::trim)
      .filter(value -> !value.isEmpty())
      .collect(Collectors.toSet());
    for (String suffix : cleanedSuffixes) {
      boolean isExtension = suffix.length() > 1 && suffix.startsWith(".") && suffix.indexOf('.', 1) == -1;
      if (isExtension) {
        binaryFileExtensions.add(suffix.substring(1));
      } else {
        binaryFileSuffixes.add(suffix);
      }
    }
  }

  @Override
  public boolean apply(InputFile inputFile) {
    String filename = inputFile.filename();
    String extension = extension(filename);
    boolean hasBinaryExtension = extension != null && binaryFileExtensions.contains(extension);
    return !hasBinaryExtension &&
      binaryFileSuffixes.stream().noneMatch(filename::endsWith) &&
      !isMd5OrSha1(filename);
  }

  public boolean isMd5OrSha1(String filename) {
    int len = filename.length();
    return ( /* md5 */ len == 32 || /* sha1 */ len == 40 || /* sha256 */ len == 64 || /* sha512 */ len == 128) &&
      HEX_REGEX.matcher(filename).matches() && EntropyChecker.calculateShannonEntropy(filename) > MD5_AND_SHA_MIN_ENTROPY;
  }

  public void addBinaryFileExtension(String extension) {
    binaryFileExtensions.add(extension);
  }

  @Nullable
  public static String extension(String filename) {
    int dotPos = filename.lastIndexOf('.');
    if (dotPos == -1 || dotPos == filename.length() - 1) {
      return null;
    }
    return filename.substring(dotPos + 1).toLowerCase(Locale.ROOT);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy