All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.artifact.terms.model.MatchPreprocessor Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.terms.model;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.regex.Pattern;

/**
 * Processes TMD license text for later application of mappings and segmentation
 */
public class MatchPreprocessor {

    private static final Logger LOG = LoggerFactory.getLogger(MatchPreprocessor.class);

    private final static Pattern PATTERN_NEWLINE_MARKER = Pattern.compile("NEWLINE-MARKER");
    private final static Pattern PATTERN_EMPTY_LINE_MARKER = Pattern.compile("EMPTY-LINE-MARKER");
    private final static Pattern PATTERN_COLON_MARKER = Pattern.compile("COLON-MARKER");
    private final static Pattern PATTERN_WHITESPACE_MARKER = Pattern.compile("\\s+");
    private final static Pattern PATTERN_FILE_SEGMENT_MARKER = Pattern.compile("FILE-SEGMENT-MARKER");
    private final static Pattern PATTERN_LICENSES_SEGMENT_MARKER = Pattern.compile("LICENSE-SEGMENT-MARKER");

    private final NormalizationMetaData normalizationMetaData;

    public MatchPreprocessor(NormalizationMetaData normalizationMetaData) {
        this.normalizationMetaData = normalizationMetaData;
    }

    /**
     * replaces license text markers for segmentation of a license text
     *
     * @param text TMD license text
     *
     * @return TMD license text with replaced markers
     */
    public String removeMarkers(String text) {
        text = PATTERN_NEWLINE_MARKER.matcher(text).replaceAll(" ");
        text = PATTERN_EMPTY_LINE_MARKER.matcher(text).replaceAll("");
        text = PATTERN_COLON_MARKER.matcher(text).replaceAll(":");
        text = PATTERN_WHITESPACE_MARKER.matcher(text).replaceAll(" ");
        return text;
    }

    public String removeMarkersForReadability(String text) {
        text = PATTERN_NEWLINE_MARKER.matcher(text).replaceAll("\n");
        text = PATTERN_EMPTY_LINE_MARKER.matcher(text).replaceAll("");
        text = PATTERN_COLON_MARKER.matcher(text).replaceAll(":");
        text = PATTERN_WHITESPACE_MARKER.matcher(text).replaceAll(" ");
        return text;
    }

    /**
     * splits a given license text into multiple segments based on the placed segment markers
     *
     * @param fileContent TMD license text
     *
     * @return list of file segments
     */
    public List applySegments(String fileContent) {
        final List licenseContextList = new ArrayList<>();
        final String[] fileContexts = PATTERN_FILE_SEGMENT_MARKER.split(fileContent);
        for (String fileContext : fileContexts) {
            final String[] localLicenseContext = PATTERN_LICENSES_SEGMENT_MARKER.split(fileContext);
            licenseContextList.addAll(Arrays.asList(localLicenseContext));
        }
        return licenseContextList;
    }

    public NormalizationMetaData getNormalizationMetaData() {
        return normalizationMetaData;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy