org.jruby.ext.ripper.LexerSource Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jruby-complete
There is a newer version: 9.4.12.0
/*
 ***** BEGIN LICENSE BLOCK *****
 * Version: EPL 1.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Eclipse Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/epl-v10.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2004-2006 Thomas E Enebo 
 * Copyright (C) 2004 Jan Arne Petersen 
 * Copyright (C) 2004 Stefan Matthias Aust 
 * Copyright (C) 2005 Zach Dennis 
 * 
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the EPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the EPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/
package org.jruby.ext.ripper;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.jcodings.Encoding;

import org.jruby.util.ByteList;

/**
 * This class is what feeds the lexer.  It is primarily a wrapper around a
 * Reader that can unread() data back onto the source.  Originally, I thought
 * about using the PushBackReader to handle read/unread, but I realized that
 * some extremely pathological case could overflow the pushback buffer.  Better
 * safe than sorry.  I could have combined this implementation with a 
 * PushbackBuffer, but the added complexity did not seem worth it.
 * 
 */
public class LexerSource {
	// Last position we gave out
    private Position lastPosition;
	
    // The name of this source (e.g. a filename: foo.rb)
    private final String sourceName;
    
    // Number of newlines read from the reader
    protected int line = 0;
    
    // Virtual line as specified by eval, etc...
    protected int lineOffset = 0;
    
    // How many bytes into the source are we?
    protected int offset = 0;
    
    // For 'list' and only populated if list is not null.
    private StringBuilder lineBuffer;

    // Last full line read.
    private StringBuilder sourceLine;
    
    private static final int INITIAL_PUSHBACK_SIZE = 100;
    
    // Where we get our newest char's
    private final InputStream in;
    
    // Our readback/pushback buffer.
    private char buf[] = new char[INITIAL_PUSHBACK_SIZE];
    
    // index of last character in pushback buffer
    private int bufLength = -1;
    
    // Character read before previous read
    private int oneAgo = '\n';
    private int twoAgo = 0;    

    /**
     * Create our food-source for the lexer
     * 
     * @param sourceName is the file we are reading
     * @param reader is what represents the contents of file sourceName
     * @param line starting line number for source (used by eval)
     * @param extraPositionInformation will gives us extra information that an IDE may want (deprecated)
     */
    protected LexerSource(String sourceName, InputStream in, int lineOffset) {
        this.in = in;        
        this.sourceName = sourceName;
        this.lineOffset = lineOffset;
        lastPosition = new Position(sourceName, line, line, offset, offset);
        lineBuffer = new StringBuilder(160);
        sourceLine = new StringBuilder(160);
    }

    /**
     * What file are we lexing?
     * @return the files name
     */
    public String getFilename() {
    	return sourceName;
    }
    
    /**
     * What line are we at?
     * @return the line number 0...line_size-1
     */
    public int getLine() {
        return line;
    }
    
    public int getVirtualLine() {
        return line + lineOffset;
    }
    
    /**
     * The location of the last byte we read from the source.
     * 
     * @return current location of source
     */
    public int getOffset() {
        return (offset <= 0 ? 0 : offset);
    }
    
    /**
     * Where is the reader within the source {filename,row}
     * 
     * @return the current position
     */
    public Position getPosition() {
        return new Position(getFilename(), lastPosition.getEndLine(),
                    getLine(), lastPosition.getEndOffset(), getOffset());
    }
    
    /**
     * Where is the reader within the source {filename,row}
     * 
     * @return the current position
     */
    public Position getPosition(Position startPosition, boolean inclusive) {
        if (startPosition == null) {
            lastPosition = new Position(getFilename(), lastPosition.getEndLine(),
                    getLine(), lastPosition.getEndOffset(), getOffset());
        } else if (inclusive) {
            lastPosition = new Position(getFilename(), startPosition.getStartLine(),
                    getLine(), startPosition.getStartOffset(), getOffset());
        } else {
            lastPosition = new Position(getFilename(), startPosition.getEndLine(),
                    getLine(), startPosition.getEndOffset(), getOffset());
        }

        return lastPosition;
    }

    private void captureFeatureNewline() {
        StringBuilder temp = sourceLine;
        // Save sourceLine for error reporting to display line where error occurred
        sourceLine = lineBuffer;

        temp.setLength(0);
        lineBuffer = temp;
    }

    protected void captureFeature(int c) {
        switch(c) {
            case '\n':
                lineBuffer.append((char) c);
            case -1:
                captureFeatureNewline();
                break;
            default:
                lineBuffer.append((char) c);
                break;
        }
    }

    public String getCurrentLine() {
        int errorLocation = lineBuffer.length() - 1;

        // Get rest of line. lineBuffer filled as side-effect...
        try { readLineBytes(); } catch (IOException e) {}


        return sourceLine.toString() + makePointer(errorLocation);
    }

    protected String makePointer(int length) {
        StringBuilder buffer = new StringBuilder();

        for (int i = 0; i < length; i++) {
            buffer.append(' ');
        }
        buffer.append('^');

        return buffer.toString();
    }

    // Super slow codepoint reader when we detect non-asci chars
    public int readCodepoint(int first, Encoding encoding) throws IOException {
        int count;
        byte[] value = new byte[6];

        // We know this will never be EOF
        value[0] = (byte) first;

        for (count = 1; count < 6; count++) {
            int c = read();
            if (c == RipperLexer.EOF) break; // Maybe we have enough bytes read to mbc at EOF.
            value[count] = (byte) c;
        }

        int length = encoding.length(value, 0, count);
        if (length < 0) {
            return -2; // TODO: Hack
        }

        int codepoint = encoding.mbcToCode(value, 0, length);
        for (int i = count - 1; i >= length; i--) {
            unread(value[i]);
        }

        return codepoint;
    }

    
    /**
     * Read next character from this source
     * 
     * @return next character to viewed by the source
     */
    public int read() throws IOException {
        int c;
        
        if (bufLength >= 0) {
            c = buf[bufLength--];
        } else {
            c = wrappedRead();
            
            if (c == -1) return RipperLexer.EOF;
        }
        
        advance(c);
        
        if (c == '\n') line++;
            
        return c; 
    }

    /**
     * Pushes char back onto this source.  Note, this also
     * allows us to push whatever is passes back into the source.
     * 
     * @param  to be put back onto the source
     */
    public void unread(int c) {
        if (c == RipperLexer.EOF) return;
        
        retreat();
            
        if (c == '\n') line--;

        buf[++bufLength] = (char) c;
        
        growBuf();
    }
    
    /**
     * Is the next character equal to 'to'
     * @param to character to compare against
     * @return true if the same
     * @throws IOException
     */
    public boolean peek(int to) throws IOException {
        // keep value of twoAgo around so we can restore after we unread
        int captureTwoAgo = twoAgo;
        int c = read();
        unread(c);
        twoAgo = captureTwoAgo;
        return c == to;
    }

    private void advance(int c) {

        twoAgo = oneAgo;
        oneAgo = c;
        offset++;
    }

    private int carriageReturn(int c) throws IOException {
        if ((c = in.read()) != '\n') {
            unread((char) c);
        } else {
            // Position within source must reflect the actual offset and column.  Since
            // we ate an extra character here (this accounting is normally done in read
            // ), we should update position info.
            offset++;
        }
        return c;
    }

    private void growBuf() {
        // If we outgrow our pushback stack then grow it (this should only happen in pretty 
        // pathological cases).
        if (bufLength + 1 == buf.length) {
            char[] newBuf = new char[buf.length + INITIAL_PUSHBACK_SIZE];

            System.arraycopy(buf, 0, newBuf, 0, buf.length);

            buf = newBuf;
        }
    }

    private void retreat() {

        offset--;
        oneAgo = twoAgo;
        twoAgo = 0;
    }
    
    /**
     * Convenience method to hide exception.  If we do hit an exception
     * we will pretend we EOF'd.
     * 
     * @return the current char or EOF (at EOF or on error)
     */
    private int wrappedRead() throws IOException {
        int c = in.read();
        
        // If \r\n then just pass along \n (windows). 
        if (c == '\r') { 
            c = carriageReturn(c);
        }

        captureFeature(c);

        return c;
    }

    public ByteList readLineBytes() throws IOException {
        ByteList bytelist = new ByteList(80);

        for (int c = read(); c != '\n' && c != RipperLexer.EOF; c = read()) {
            bytelist.append(c);
        }

        return bytelist;
    }
    
    public ByteList readLineBytesPlusNewline() throws IOException {
        ByteList bytelist = new ByteList(80);

        int c = read();
        for (; c != '\n' && c != RipperLexer.EOF; c = read()) {
            bytelist.append(c);
        }
        if (c != RipperLexer.EOF) bytelist.append(c);

        return bytelist;        
    }
    
    public int skipUntil(int marker) throws IOException {
        int c;
        for (c = read(); c != marker && c != RipperLexer.EOF; c = read()) {}
        return c;
    }

    public void unreadMany(CharSequence buffer) {
        int length = buffer.length();
        for (int i = length - 1; i >= 0; i--) {
            unread(buffer.charAt(i));
        }
    }

    /**
     * Match marker against input consumering lexer source as it goes...Unless it does not match
     * then it reverts lexer source back to point when this method was invoked.
     * 
     * @param marker to match against
     * @param indent eat any leading whitespace
     * @param withNewline includes a check that marker is followed by newline or EOF
     * @return 0 if no match -1 is EOF and '\n' if newline (only if withNewline is true).
     * @throws IOException if an error occurred reading from underlying IO source
     */
    public int matchMarker(ByteList match, boolean indent, boolean checkNewline) throws IOException {
        int length = match.length();
        ByteList buffer = new ByteList(length + 1);
        
        if (indent) {
            indentLoop(buffer);
        }
        
        if (!matches(match, buffer, length)) return 0;
        
        return finishMarker(checkNewline, buffer); 
    }

    private void indentLoop(ByteList buffer) throws IOException {
        int c;
        while ((c = read()) != RipperLexer.EOF) {
            if (!Character.isWhitespace(c) || c == '\n') {
                unread(c);
                break;
            }
            buffer.append(c);
        }
    }
    
    private boolean matches(ByteList match, ByteList buffer, int length) throws IOException {
        int c;
        for (int i = 0; i < length; i++) {
            c = read();
            buffer.append(c);
            if (match.charAt(i) != c) {
                unreadMany(buffer);
                return false;
            }
        }
        return true;
    }

    private int finishMarker(boolean checkNewline, ByteList buffer) throws IOException {

        if (!checkNewline) return -1;

        int c = read();

        if (c == RipperLexer.EOF) return -1;
        if (c == '\n') return '\n';

        buffer.append(c);
        unreadMany(buffer);

        return 0;
    }
    
    /**
     * Was the last character read from the stream the first character on a line
     * 
     * @return true if so
     */
    public boolean wasBeginOfLine() {
        return twoAgo == '\n';
    }

    public boolean lastWasBeginOfLine() {
        return oneAgo == '\n';
    }
    
    static final ByteList EOF_LABEL = new ByteList(new byte[] {'{', 'e', 'o', 'f', '}'});

    @Override
    public String toString() {
        try {
            ByteList buffer = new ByteList(20);
            ByteList unreadBuffer = new ByteList(20);

            if (twoAgo != -1 && twoAgo != 0) buffer.append(twoAgo);
            if (oneAgo != -1 && oneAgo != 0) buffer.append(oneAgo);

            buffer.append('<');

            int c = read();
            unreadBuffer.append(c);
            
            if (c == -1) {
                unread(unreadBuffer.charAt(0));
                buffer.append(EOF_LABEL);
                buffer.append('>');
                
                return buffer.toString();
            } else {
                buffer.append(c).append('>');
            }
            int i = 1;
            
            for (; i < 20; i++) {
                c = read();
                unreadBuffer.append(c);
                if (c == -1) {
                    buffer.append(EOF_LABEL);
                    i--;
                    break;
                }
                buffer.append(c);
            }
            for (; i >= 0; i--) {
                unread(unreadBuffer.charAt(i));
            }
            buffer.append(new byte[] {' ', '.', '.', '.'});
            return buffer.toString();
        } catch(Exception e) {
            return null;
        }
    }

    public ByteList readUntil(char marker) throws IOException {
        ByteList buffer = new ByteList(20);
        int c;
        
        for (c = read(); c != marker && c != RipperLexer.EOF; c = read()) {
            buffer.append(c);
        }
        
        if (c == RipperLexer.EOF) return null;
        
        unread(c);
        
        return buffer;
    }

    public InputStream getRemainingAsStream() throws IOException {
        return bufferEntireStream(in);
    }

    private InputStream bufferEntireStream(InputStream stream) throws IOException {
        byte[] allBytes = new byte[0];
        byte[] b = new byte[1024];
        int bytesRead;
        while ((bytesRead = stream.read(b)) != -1) {
            byte[] newbuf = new byte[allBytes.length + bytesRead];
            System.arraycopy(allBytes, 0, newbuf, 0, allBytes.length);
            System.arraycopy(b, 0, newbuf, allBytes.length, bytesRead);
            allBytes = newbuf;
        }

        return new ByteArrayInputStream(allBytes);
    }    
}