All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.neo4j.cypher.internal.parser.javacc.CypherCharStream Maven / Gradle / Ivy

There is a newer version: 5.23.0
Show newest version
/*
 * Copyright (c) "Neo4j"
 * Neo4j Sweden AB [http://neo4j.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.neo4j.cypher.internal.parser.javacc;

import java.io.IOException;

/**
 * CharStream operating over an input String.
 * 

* This class unescapes escaped unicode characters, and to do that efficiently * it keeps an internal incremental copy of the input *

*

 * Example
 *      query: "WITH 1 AS x
 *              RETURN '\\u01FF' AS y"
 *     result: [W, I, T, H,  , 1,  , A, S,  , x,\n, R, E, T, U, R, N,  , ', ǿ, ',  , A, S,  , y]
 *      lines: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
 *    columns: [1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12, 1, 2, 3, 4, 5, 6, 7, 8, 9,16,17,18,19,20,21]
 *     offset: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,27,28,29,30,31,32]
 *                                                                          ^
 *                                                                          un-escaped unicode
 * 
*

* As parsing progresses, the {@link CypherCharStream} will convert more and more * of `query` into `result`, while updating `lines`, `columns` and `offset`. */ public class CypherCharStream implements CharStream { private static final char BACKSLASH = '\\'; private static final IOException END_OF_INPUT = new IOException( "End of input" ); private final String query; private int queryCursor = -1; private int queryCursorColumn; private int queryCursorLine = 1; private boolean queryCursorIsCR; private boolean queryCursorIsLF; private char[] result; private int resultCursor = -1; private int resultHighMark; private final int[] lines; private final int[] columns; private final int[] offsets; private int beginOffset; private int tabSize = 1; public CypherCharStream( String query ) { this.query = query; this.result = new char[query.length()]; this.lines = new int[query.length()]; this.columns = new int[query.length()]; this.offsets = new int[query.length()]; } @Override public char readChar() throws IOException { if ( resultCursor + 1 == resultHighMark ) { convertChar(); } resultCursor++; return result[resultCursor]; } private void convertChar() throws IOException { char c = nextQueryChar(); if ( c == BACKSLASH ) { char c2 = nextQueryChar(); if ( c2 == 'u' ) { c = convertUnicode( c2 ); } else { appendToResult( c ); c = c2; } } appendToResult( c ); } private void appendToResult( char c ) { result[resultHighMark] = c; lines[resultHighMark] = queryCursorLine; columns[resultHighMark] = queryCursorColumn; offsets[resultHighMark] = queryCursor; resultHighMark++; } private char nextQueryChar() throws IOException { if ( queryCursor + 1 >= query.length() ) { throw END_OF_INPUT; } queryCursor++; char c = query.charAt( queryCursor ); updateLineColumn( c ); return c; } private void updateLineColumn( char c ) { queryCursorColumn++; if ( queryCursorIsLF ) { queryCursorIsLF = false; queryCursorColumn = 1; queryCursorLine++; } else if ( queryCursorIsCR ) { queryCursorIsCR = false; if ( c == '\n' ) { queryCursorIsLF = true; } else { queryCursorColumn = 1; queryCursorLine++; } } switch ( c ) { case '\r': queryCursorIsCR = true; break; case '\n': queryCursorIsLF = true; break; case '\t': queryCursorColumn--; queryCursorColumn += tabSize - (queryCursorColumn % tabSize); break; default: break; } } private char convertUnicode( char c ) { try { while ( c == 'u' ) { c = nextQueryChar(); } return (char) (hexval( c ) << 12 | hexval( nextQueryChar() ) << 8 | hexval( nextQueryChar() ) << 4 | hexval( nextQueryChar() )); } catch ( final IOException e ) { throw new InvalidUnicodeLiteral( e.getMessage(), queryCursor, queryCursorLine, queryCursorColumn ); } } @Override public void backup( int amount ) { resultCursor -= amount; } @Override public int getBeginColumn() { return columns[beginOffset]; } @Override public int getBeginLine() { return lines[beginOffset]; } public int getBeginOffset() { return offsets[beginOffset]; } @Override public int getEndColumn() { return columns[resultCursor]; } @Override public int getEndLine() { return lines[resultCursor]; } public int getEndOffset() { return offsets[resultCursor]; } @Override public char beginToken() throws IOException { var c = readChar(); beginOffset = resultCursor; return c; } @Override public String getImage() { return new String( result, beginOffset, nextOffset() - beginOffset ); } private int nextOffset() { return resultCursor + 1; } @Override public char[] getSuffix( int len ) { char[] suffix = new char[len]; int endOffset = nextOffset(); System.arraycopy( result, endOffset - len, suffix, 0, len ); return suffix; } @Override public void done() { } @Override public void setTabSize( int i ) { throw new UnsupportedOperationException( "not implemented" ); } @Override public int getTabSize() { throw new UnsupportedOperationException( "not implemented" ); } @Override public void setTrackLineColumn( boolean trackLineColumn ) { throw new UnsupportedOperationException( "not implemented" ); } @Override public boolean isTrackLineColumn() { return true; } static int hexval( final char c ) throws IOException { switch ( c ) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: throw new IOException( "Invalid input '" + c + "': expected four hexadecimal digits specifying a unicode character" ); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy