com.metaeffekt.artifact.terms.model.MatchPreprocessor Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.artifact.terms.model;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.regex.Pattern;
/**
* Processes TMD license text for later application of mappings and segmentation
*/
public class MatchPreprocessor {
private static final Logger LOG = LoggerFactory.getLogger(MatchPreprocessor.class);
private final static Pattern PATTERN_NEWLINE_MARKER = Pattern.compile("NEWLINE-MARKER");
private final static Pattern PATTERN_EMPTY_LINE_MARKER = Pattern.compile("EMPTY-LINE-MARKER");
private final static Pattern PATTERN_COLON_MARKER = Pattern.compile("COLON-MARKER");
private final static Pattern PATTERN_WHITESPACE_MARKER = Pattern.compile("\\s+");
private final static Pattern PATTERN_FILE_SEGMENT_MARKER = Pattern.compile("FILE-SEGMENT-MARKER");
private final static Pattern PATTERN_LICENSES_SEGMENT_MARKER = Pattern.compile("LICENSE-SEGMENT-MARKER");
private final NormalizationMetaData normalizationMetaData;
public MatchPreprocessor(NormalizationMetaData normalizationMetaData) {
this.normalizationMetaData = normalizationMetaData;
}
/**
* replaces license text markers for segmentation of a license text
*
* @param text TMD license text
*
* @return TMD license text with replaced markers
*/
public String removeMarkers(String text) {
text = PATTERN_NEWLINE_MARKER.matcher(text).replaceAll(" ");
text = PATTERN_EMPTY_LINE_MARKER.matcher(text).replaceAll("");
text = PATTERN_COLON_MARKER.matcher(text).replaceAll(":");
text = PATTERN_WHITESPACE_MARKER.matcher(text).replaceAll(" ");
return text;
}
public String removeMarkersForReadability(String text) {
text = PATTERN_NEWLINE_MARKER.matcher(text).replaceAll("\n");
text = PATTERN_EMPTY_LINE_MARKER.matcher(text).replaceAll("");
text = PATTERN_COLON_MARKER.matcher(text).replaceAll(":");
text = PATTERN_WHITESPACE_MARKER.matcher(text).replaceAll(" ");
return text;
}
/**
* splits a given license text into multiple segments based on the placed segment markers
*
* @param fileContent TMD license text
*
* @return list of file segments
*/
public List applySegments(String fileContent) {
final List licenseContextList = new ArrayList<>();
final String[] fileContexts = PATTERN_FILE_SEGMENT_MARKER.split(fileContent);
for (String fileContext : fileContexts) {
final String[] localLicenseContext = PATTERN_LICENSES_SEGMENT_MARKER.split(fileContext);
licenseContextList.addAll(Arrays.asList(localLicenseContext));
}
return licenseContextList;
}
public NormalizationMetaData getNormalizationMetaData() {
return normalizationMetaData;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy