All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.javaparser.LineEndingProcessingProvider Maven / Gradle / Ivy

/*
 * Copyright (C) 2007-2010 Júlio Vilmar Gesser.
 * Copyright (C) 2011, 2013-2023 The JavaParser Team.
 *
 * This file is part of JavaParser.
 *
 * JavaParser can be used either under the terms of
 * a) the GNU Lesser General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * b) the terms of the Apache License
 *
 * You should have received a copy of both licenses in LICENCE.LGPL and
 * LICENCE.APACHE. Please refer to those files for details.
 *
 * JavaParser is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 */
package com.github.javaparser;

import com.github.javaparser.utils.LineSeparator;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

/**
 * {@link Provider} un-escaping unicode escape sequences in the input sequence.
 */
public class LineEndingProcessingProvider implements Provider {

    private static final int EOF = -1;

    private static final int DEFAULT_BUFFER_SIZE = 2048;

    /**
     * The "other" provider which we are wrapping around / reading from.
     */
    private final Provider _input;

    /**
     * The buffer that we're storing data within.
     */
    private final char[] _data;

    /**
     * The number of characters in {@link #_data}.
     */
    private int _len = 0;

    /**
     * The position in {@link #_data} where to read the next source character from.
     */
    private int _pos = 0;

    private final Map eolCounts = new HashMap<>();

    public LineEndingProcessingProvider(Provider input) {
        this(DEFAULT_BUFFER_SIZE, input);
    }

    public LineEndingProcessingProvider(int bufferSize, Provider input) {
        _input = input;
        _data = new char[bufferSize];
    }

    @Override
    public void close() throws IOException {
        _input.close();
    }

    private int fillBuffer() throws IOException {
        _pos = 0;
        int direct = _input.read(_data, 0, _data.length);
        if (direct != 0) {
            _len = direct;
        }
        return direct;
    }

    public LineSeparator getDetectedLineEnding() {
        return LineSeparator.getLineEnding(eolCounts.getOrDefault(LineSeparator.CR, 0), eolCounts.getOrDefault(LineSeparator.LF, 0), eolCounts.getOrDefault(LineSeparator.CRLF, 0));
    }

    private boolean isBufferEmpty() {
        return _pos >= _len;
    }

    /**
     * Retrieves the next un-escaped character from the buffered {@link #_input}.
     *
     * @return The next character or {@code -1} if no more input is available.
     */
    private int nextBufferedChar() throws IOException {
        while (isBufferEmpty()) {
            int direct = fillBuffer();
            if (direct < 0) {
                return EOF;
            }
        }
        return _data[_pos++];
    }

    @Override
    public int read(char[] buffer, final int offset, int len) throws IOException {
        int pos = offset;
        int stop = offset + len;
        LineSeparator previousLineSeparator = null;
        while (pos < stop) {
            int ch = nextBufferedChar();
            if (ch < 0) {
                if (pos == offset) {
                    // Nothing read yet, this is the end of the stream.
                    return EOF;
                }
                break;
            }
            String str = String.valueOf((char) ch);
            Optional lookup = LineSeparator.lookup(str);
            if (lookup.isPresent()) {
                    LineSeparator lineSeparator = lookup.get();
                    // Track the number of times this character is found..
                    eolCounts.putIfAbsent(lineSeparator, 0);
                    eolCounts.put(lineSeparator, eolCounts.get(lineSeparator) + 1);
                    // Handle line separators of length two (specifically CRLF)
                    // TODO: Make this more generic than just CRLF (e.g. track the previous char rather than the previous line separator
                    if (lineSeparator == LineSeparator.LF) {
                        if (previousLineSeparator == LineSeparator.CR) {
                            eolCounts.putIfAbsent(LineSeparator.CRLF, 0);
                            eolCounts.put(LineSeparator.CRLF, eolCounts.get(LineSeparator.CRLF) + 1);
                        }
                    }
                    // If "this" (current) char is a line separator, set the next loop's "previous" to this
                    previousLineSeparator = lineSeparator;
                } else {
                    // If "this" (current) char is not a line separator, set the next loop's "previous" to null
                    previousLineSeparator = null;
                }
            buffer[pos++] = (char) ch;
        }
        return pos - offset;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy