com.metaeffekt.artifact.analysis.scancode.ScanCodeParser Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.artifact.analysis.scancode;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import com.metaeffekt.artifact.terms.model.TermsMetaData;
import org.apache.commons.lang3.tuple.Pair;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.AntPathMatcher;
import org.yaml.snakeyaml.Yaml;
import java.io.File;
import java.io.IOException;
import java.util.*;
public class ScanCodeParser {
public final static Logger LOG = LoggerFactory.getLogger(ScanCodeParser.class);
private final static AntPathMatcher ANT_PATH_MATCHER = new AntPathMatcher();
public void scanCodeToLicenseMetaData(File licenseSrcDir, File rulesSrcDir, File targetBaseDir) throws IOException {
final List licenseIds = parseLicenseIds(licenseSrcDir);
// OPTIMIZE: we may want to optimize this; we repeat the scan for every license id;
// pull out of loop and filter when processing
final String[] ruleYamlFiles = FileUtils.scanDirectoryForFiles(rulesSrcDir, "*.RULE");
for (String licenseId : licenseIds) {
final File licenseTargetDir = new File(new File(targetBaseDir, licenseId.substring(0, 1)), licenseId);
// parse and write TermsMetaData
final TermsMetaData termsMetaData = parseTermsMetaData(licenseId, licenseSrcDir, rulesSrcDir);
File targetFile = new File(licenseTargetDir, "license.meta.yaml");
termsMetaData.writeToFile(targetFile);
// copy variant license texts for this license id (we parse the rules a second time)
final Yaml yaml = new Yaml();
for (String ruleFile : ruleYamlFiles) {
if (ruleFile.equalsIgnoreCase(licenseId + ".RULE") || ruleFile.equalsIgnoreCase(licenseId + "_*.RULE")) {
final File ruleYamlFile = new File(rulesSrcDir, ruleFile);
final Pair content = readScanCodeMixedFormat(ruleYamlFile);
final ScanCodeRule scanCodeRule = yaml.loadAs(content.getLeft(), ScanCodeRule.class);
final String rule = content.getRight();
if (scanCodeRule.isIs_license_text()) {
File variantDst = new File(licenseTargetDir, "variants/" + ruleFile.replace(".RULE", ".txt"));
FileUtils.write(variantDst, rule, FileUtils.ENCODING_UTF_8);
}
}
}
// copy license
final File licenseFile = new File(licenseSrcDir, licenseId + ".LICENSE");
final File targetLicenseFile = new File(licenseTargetDir, "license/" + licenseId + ".txt");
if (licenseFile.exists()) {
final Pair content = readScanCodeMixedFormat(licenseFile);
FileUtils.writeStringToFile(targetLicenseFile, content.getRight().trim(), FileUtils.ENCODING_UTF_8);
}
LOG.info("Parsing scancode license {} completed.", licenseId);
}
}
private Pair readScanCodeMixedFormat(File file) throws IOException {
String fileContent = FileUtils.readFileToString(file, FileUtils.ENCODING_UTF_8);
String intermediate = fileContent.replaceFirst("---\\n", "");
final int index = intermediate.indexOf("\n---");
String yamlPart = intermediate.substring(0, index);
String content = intermediate.substring(index + 4).trim();
return Pair.of(yamlPart, content);
}
public TermsMetaData parseTermsMetaData(String licenseId, File licenseBaseDir, File rulesBaseDir) throws IOException {
LOG.info("Parsing scancode license {}...", licenseId);
TermsMetaData termsMetaData = new TermsMetaData();
final Yaml yaml = new Yaml();
File licenseYamlFile = new File(licenseBaseDir, licenseId + ".LICENSE");
if (!licenseYamlFile.exists()) {
LOG.warn("Inconsistency in scancode versions. The license file [{}] could not be parsed. The file may not exist.", licenseYamlFile);
return null;
}
final Pair licenseContent = readScanCodeMixedFormat(licenseYamlFile);
ScanCodeLicense scanCodeLicense = yaml.loadAs(licenseContent.getLeft(), ScanCodeLicense.class);
termsMetaData.setCanonicalName(scanCodeLicense.getName());
termsMetaData.setCategory(scanCodeLicense.getName());
termsMetaData.setSpdxIdentifier(scanCodeLicense.getSpdx_license_key());
termsMetaData.addOtherId("scancode", licenseId);
termsMetaData.setShortName(licenseId);
termsMetaData.setType(scanCodeLicense.isIs_exception() ? "exception" : null);
termsMetaData.setUrl(scanCodeLicense.getHomepage_url());
termsMetaData.setClassification(mapCategoryToClassification(scanCodeLicense.getCategory()));
if (scanCodeLicense.getOsi_license_key() != null) {
termsMetaData.addOtherId("osi", scanCodeLicense.getOsi_license_key());
}
// add the rules
final String[] ruleYamlFiles = FileUtils.scanDirectoryForFiles(rulesBaseDir, licenseId + ".RUlE", licenseId + "_*.RULE");
for (String ruleFile : ruleYamlFiles) {
File ruleYamlFile = new File(rulesBaseDir, ruleFile);
final Pair ruleContent = readScanCodeMixedFormat(ruleYamlFile);
ScanCodeRule scanCodeRule = yaml.loadAs(ruleContent.getLeft(), ScanCodeRule.class);
if (scanCodeRule.getIs_license_reference() != null && scanCodeRule.getIs_license_reference().startsWith("yes")) {
String ref = ruleContent.getRight().replace("\n", " ").trim();
ref = ref.replace("\"", "\\\"");
termsMetaData.getAlternativeNames().add(ref);
}
}
termsMetaData.consolidateAlternativeNames();
return termsMetaData;
}
private String mapCategoryToClassification(String category) {
if ("permissive".equalsIgnoreCase(category)) return "permissive";
if ("copyleft".equalsIgnoreCase(category)) return "copyleft";
if ("copyleft limited".equalsIgnoreCase(category)) return "limited copyleft";
if ("Proprietary Free".equalsIgnoreCase(category)) return "proprietary free";
if ("commercial".equalsIgnoreCase(category)) return "commercial";
if ("Free Restricted".equalsIgnoreCase(category)) return "restricted free";
if ("Patent License".equalsIgnoreCase(category)) return "patent license";
if ("Public Domain".equalsIgnoreCase(category)) return "public domain";
if ("Source-available".equalsIgnoreCase(category)) return "source-available";
LOG.warn("Cannot map unknown category: {}", category);
return category;
}
protected List parseLicenseIds(File licenseSrcDir) {
final List licenseIds = new ArrayList<>();
final String[] licenseYamlFiles = FileUtils.scanDirectoryForFiles(licenseSrcDir, "**/*.LICENSE");
for (String licenseYamlFile : licenseYamlFiles) {
licenseIds.add(licenseYamlFile.substring(0, licenseYamlFile.lastIndexOf(".LICENSE")));
}
return licenseIds;
}
/**
* Parses the outputs from scancode.
*
* @param result String with output content.
* @param licenseList The list of licenses.
* @param copyrightList The list of copyrights.
* @param authorList The list of authors.
* @param includes The strings to include.
* @param excludes The strings to exclude.
* @throws JSONException If the result JSON string is invalid.
*/
public static void parseScanCodeResult(final String result, TreeSet licenseList, TreeSet copyrightList, TreeSet authorList, String[] includes, String[] excludes) throws JSONException {
// NOTE:
// - the scan code result is filtered using includes and excludes patterns
// - per default the analyze.metascan.license.includes and analyze.metascan.license.excludes are used to
// conform with the settings of Metascan.
final JSONObject obj = new JSONObject(result);
final JSONArray files = obj.getJSONArray("files");
for (int i = 0; i < files.length(); i++) {
final JSONObject file = files.getJSONObject(i);
final String path = file.getString("path");
boolean includeMatch = false;
for (String includePattern : includes) {
includeMatch |= ANT_PATH_MATCHER.match(includePattern, path);
if (includeMatch) break;
}
// skip if not included
if (!includeMatch) continue;
boolean excludeMatch = false;
for (String excludePattern : excludes) {
excludeMatch |= ANT_PATH_MATCHER.match(excludePattern, path);
if (includeMatch) break;
}
// skip if excluded
if (excludeMatch) continue;
List licenses = parseLicenses(file);
licenseList.addAll(licenses);
JSONArray copyrights = file.getJSONArray("copyrights");
addValuesToList(copyrights, copyrightList, "copyright");
JSONArray authors = file.getJSONArray("authors");
addValuesToList(authors, authorList, "author");
}
}
private static List parseLicenses(JSONObject file) {
List aggregatedLicenseExpressions = new ArrayList<>();
if (file.has("licenses")) {
// pre 32 version
JSONArray licenses = file.getJSONArray("licenses");
for (int i = 0; i < licenses.length(); i++) {
JSONObject licensesJSONObject = licenses.getJSONObject(i);
String license = licensesJSONObject.getString("key");
// not necessary, but just for incident
final String[] split = license.split(" AND ");
aggregatedLicenseExpressions.addAll(Arrays.asList(split));
}
} else {
JSONArray licenseDetections = file.getJSONArray("license_detections");
for (int i = 0; i < licenseDetections.length(); i++) {
JSONObject licenseExpression = licenseDetections.getJSONObject(i);
String expressionString = licenseExpression.getString("license_expression");
final String[] split = expressionString.split(" AND ");
aggregatedLicenseExpressions.addAll(Arrays.asList(split));
}
}
return aggregatedLicenseExpressions;
}
private static void addValuesToList(JSONArray copyrights, TreeSet copyrightList, String key) throws JSONException {
for (int j = 0; j < copyrights.length(); j++) {
final JSONObject author = copyrights.getJSONObject(j);
final String value = getFieldStringValue(author, key);
if (!StringUtils.isEmpty(value)) {
copyrightList.add(value);
}
}
}
public void parseScanCodeResult(String result, Map> licenseFileMap, Map> copyrightFileMap, Map> authorFileMap) throws JSONException {
JSONObject obj = new JSONObject(result);
JSONArray files = obj.getJSONArray("files");
for (int i = 0; i < files.length(); i++) {
JSONObject file = files.getJSONObject(i);
final JSONArray copyrights = file.getJSONArray("copyrights");
final String path = file.getString("path");
for (int j = 0; j < copyrights.length(); j++) {
JSONObject copyright = copyrights.getJSONObject(j);
String value = getFieldStringValue(copyright, "copyright");
List list = copyrightFileMap.get(value);
if (list == null) {
list = new ArrayList<>();
copyrightFileMap.put(value, list);
}
if (!list.contains(path)) {
list.add(path);
}
}
JSONArray authors = file.getJSONArray("authors");
for (int j = 0; j < authors.length(); j++) {
JSONObject author = authors.getJSONObject(j);
String value = getFieldStringValue(author, "author");
List list = authorFileMap.get(value);
if (list == null) {
list = new ArrayList<>();
authorFileMap.put(value, list);
}
if (!list.contains(path)) {
list.add(path);
}
}
List licenses = parseLicenses(file);
for (String license : licenses) {
final List list = licenseFileMap.computeIfAbsent(license, k -> new ArrayList<>());
if (!list.contains(path)) {
list.add(path);
}
}
}
}
private static String getFieldStringValue(JSONObject object, String key) throws JSONException {
if (object.has(key)) {
return object.getString(key);
} else {
// old format; no longer supported
throw new IllegalStateException();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy