All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gs.obevo.impl.util.MultiLineStringSplitter Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2017 Goldman Sachs.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.gs.obevo.impl.util;

import java.util.regex.Pattern;

import com.gs.obevo.db.sqlparser.tokenparser.SqlToken;
import com.gs.obevo.db.sqlparser.tokenparser.SqlTokenParser;
import com.gs.obevo.db.sqlparser.tokenparser.SqlTokenType;
import org.apache.commons.lang3.StringUtils;
import org.eclipse.collections.api.block.function.Function;
import org.eclipse.collections.api.list.MutableList;
import org.eclipse.collections.impl.factory.Lists;

public class MultiLineStringSplitter implements Function> {
    public static MultiLineStringSplitter createSplitterOnSpaceAndLine(String token) {
        return new MultiLineStringSplitter("[^\\S\\n]*" + token + "[^\\S\\n]*");
    }

    private final String splitToken;
    private final boolean splitOnWholeLine;

    public MultiLineStringSplitter(String splitToken) {
        this(splitToken, true);
    }

    public MultiLineStringSplitter(String splitToken, boolean splitOnWholeLine) {
        this.splitToken = splitToken;
        this.splitOnWholeLine = splitOnWholeLine;
    }

    private final SqlTokenParser tokenParser = new SqlTokenParser();

    private MutableList collapseWhiteSpaceAndTokens(MutableList sqlTokens) {
        MutableList collapsedTokens = Lists.mutable.empty();

        String curText = "";
        boolean inText = false;
        for (SqlToken sqlToken : sqlTokens) {
            switch (sqlToken.getTokenType()) {
            case STRING:
            case COMMENT:
                if (inText) {
                    collapsedTokens.add(new SqlToken(SqlTokenType.TOKEN, curText));
                    curText = "";
                }
                inText = false;
                collapsedTokens.add(sqlToken);
                break;
            case TOKEN:
            case WHITESPACE:
                inText = true;
                curText += sqlToken.getText();
                break;
            default:
                throw new IllegalArgumentException("Not expecting this enum type: " + sqlToken.getTokenType());
            }
        }
        if (inText) {
            collapsedTokens.add(new SqlToken(SqlTokenType.TOKEN, curText));
        }

        return collapsedTokens;
    }

    @Override
    public MutableList valueOf(String inputString) {
        inputString += "\n";  // add sentinel to facilitate line split

        MutableList sqlTokens = this.tokenParser.parseTokens(inputString);
        sqlTokens = this.collapseWhiteSpaceAndTokens(sqlTokens);

        MutableList finalSplitStrings = Lists.mutable.empty();
        String currentSql = "";

        for (SqlToken sqlToken : sqlTokens) {
            if (sqlToken.getTokenType() == SqlTokenType.COMMENT || sqlToken.getTokenType() == SqlTokenType.STRING) {
                currentSql += sqlToken.getText();
            } else {
                String pattern = splitOnWholeLine ? "(?i)^" + this.splitToken + "$" : this.splitToken;
                MutableList splitStrings =
                        Lists.mutable.with(Pattern.compile(pattern, Pattern.MULTILINE).split(sqlToken.getText()));
                if (splitStrings.isEmpty()) {
                    // means that we exactly match
                    finalSplitStrings.add(currentSql);
                    currentSql = "";
                } else if (splitStrings.size() == 1) {
                    currentSql += sqlToken.getText();
                } else {
                    splitStrings.set(0, currentSql + splitStrings.get(0));

                    if (splitOnWholeLine) {
                        if (splitStrings.size() > 1) {
                            splitStrings.set(0, StringUtils.chomp(splitStrings.get(0)));
                            for (int i = 1; i < splitStrings.size(); i++) {
                                String newSql = splitStrings.get(i);
                                if (newSql.startsWith("\n")) {
                                    newSql = newSql.replaceFirst("^\n", "");
                                } else if (newSql.startsWith("\r\n")) {
                                    newSql = newSql.replaceFirst("^\r\n", "");
                                }

                                // Chomping the end of each sql due to the split of the GO statement
                                if (i < splitStrings.size() - 1) {
                                    newSql = StringUtils.chomp(newSql);
                                }
                                splitStrings.set(i, newSql);
                            }
                        }
                    }

                    finalSplitStrings.addAll(splitStrings.subList(0, splitStrings.size() - 1));
                    currentSql = splitStrings.getLast();
                }
            }
        }

        if (!currentSql.isEmpty()) {
            finalSplitStrings.add(currentSql);
        }

        // accounting for the sentinel
        if (finalSplitStrings.getLast().isEmpty()) {
            finalSplitStrings.remove(finalSplitStrings.size() - 1);
        } else {
            finalSplitStrings.set(finalSplitStrings.size() - 1, StringUtils.chomp(finalSplitStrings.getLast()));
        }

        return finalSplitStrings;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy