All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.netbeans.modules.languages.parser.Pattern Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.netbeans.modules.languages.parser;

import java.util.Map;
import org.netbeans.api.languages.ParseException;
import org.netbeans.api.languages.CharInput;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.netbeans.modules.languages.TokenType;
import org.netbeans.modules.languages.parser.StringInput;

public class Pattern {
    
    private static final Character STAR = new Character ((char) 0);
    private static NodeFactory nodeFactory = new NodeFactory () {
        private int counter = 1;
        public Integer createNode () {
            return Integer.valueOf (counter++);
        }
    };
    
    public static Pattern create () {
        return new Pattern ();
    }
    
    public static Pattern create (String input) throws ParseException {
        if (input.length () == 0) throw new ParseException ("Empty pattern.");
        return create (new StringInput (input));
    }
    
    public static Pattern create (CharInput input) throws ParseException {
        Pattern p = createIn (input);
        DG ndg = DGUtils.reduce (p.dg, nodeFactory);
        return new Pattern (ndg);
    }
    
    private static Pattern createCaseInsensitive (StringBuffer input) throws ParseException {
        int length = input.length();
        Pattern pattern = new Pattern ();
        for (int x = 0; x < length; x++) {
            char c = input.charAt(x);
            char up = Character.toUpperCase(c);
            char down = Character.toLowerCase(c);
            if (up != down) {
                pattern = pattern.append(
                        new Pattern(new Character(up)).merge(new Pattern(new Character(down)))
                );
            } else {
                pattern = pattern.append(new Pattern(new Character(c)));
            }
        }
        return pattern;
    }
    
    private static Pattern createIn (CharInput input) throws ParseException {
        Pattern pattern = new Pattern ();
        Pattern last = null;
        char ch = input.next ();
        while (ch != 0) {
            switch (ch) {
                case ' ':
                case '\t':
                case '\n':
                case '\r':
                    input.read ();
                    break;
                case '*':
                    input.read ();
                    if (last == null) throw new ParseException ("Unexpected character '" + ch + "'.");
                    last = last.star ();
                    break;
                case '?':
                    input.read ();
                    if (last == null) throw new ParseException ("Unexpected character '" + ch + "'.");
                    last = last.question ();
                    break;
                case '+':
                    input.read ();
                    if (last == null) throw new ParseException ("Unexpected character '" + ch + "'.");
                    last = last.plus ();
                    break;
                case '(':
                    input.read ();
                    if (last != null) pattern = pattern.append (last);
                    last = createIn (input);
                    if (input.next () != ')')
                        throw new ParseException ("Unexpected character '" + input.next () + "'.");
                    input.read ();
                    break;
//                case '<':
//                    input.read ();
//                    if (last != null) pattern = pattern.append (last);
//                    last = new Pattern (readToken (input));
//                    if (input.read () != '>')
//                        throw new ParseException ("> expected: " + input);
//                    break;
                case '\'':
                case '"':
                    input.read ();
                    if (last != null) pattern = pattern.append (last);
                    last = Pattern.create ();
                    StringBuffer buf = new StringBuffer();
                    ch = input.next ();
                    while (ch != '"' && ch != '\'') {
                        if (ch == 0)
                            throw new ParseException ("Unexpected character '" + ch + "'.");
                        if (ch == '\\') {
                            input.read ();
                            switch (input.next ()) {
                                case '\\':
                                    input.read ();
                                    buf.append('\\');
                                    break;
                                case 'n':
                                    input.read ();
                                    buf.append('\n');
                                    break;
                                case 'r':
                                    input.read ();
                                    buf.append('\r');
                                    break;
                                case 't':
                                    input.read ();
                                    buf.append('\t');
                                    break;
                                case '"':
                                    input.read ();
                                    buf.append('"');
                                    break;
                                case '\'':
                                    input.read ();
                                    buf.append('\'');
                                    break;
                                case 'u':
                                    input.read ();
                                    int ch1 = 0;
                                    for (int i = 16*16*16; i >= 1; i/=16) {
                                        char c = input.next ();
                                        int ii = 0;
                                        if ('0' <= c && c <= '9') {
                                            ii = c - '0';
                                        } else if ('a' <= c && c <= 'f') {
                                            ii = c - 'a' + 10;
                                        } else if ('A' <= c && c <= 'F') {
                                            ii = c - 'A' + 10;
                                        } else {
                                            throw new ParseException ("Unexpected character after \\u:" + c);
                                        }
                                        ch1 += ii * i;
                                        input.read ();
                                    }
                                    buf.append((char) ch1);
                                    break;
                                default:
                                    throw new ParseException ("Unexpected character after \\:" + input.next ());
                            }
                        } else {
                            buf.append(input.read());
                        }
                        ch = input.next ();
                    }
                    input.read ();
                    ch = input.next();
                    if (ch == 'i') {
                        input.read();
                        last = last.append(createCaseInsensitive(buf));
                    } else {
                        int length = buf.length();
                        Pattern pat = new Pattern();
                        for (int x = 0; x < length; x++) {
                            pat = pat.append(new Pattern(new Character(buf.charAt(x))));
                        }
                        last = last.append(pat);
                    }
                    break;
                case '|':
                    input.read ();
                    if (last != null) pattern = pattern.append (last);
                    last = null;
                    pattern = pattern.merge (Pattern.createIn (input));
                    return pattern;
                case '-':
                    if (last != null) pattern = pattern.append (last);
                    input.read ();
                    skipWhitespaces (input);
                    ch = input.next ();
                    if (ch != '\'' && ch != '"')
                        throw new ParseException ("Unexpected character '" + ch + "'.");
                    input.read ();
                    ch = input.next ();
                    if (ch == '\'' || ch == '"')
                        throw new ParseException ("Unexpected character '" + ch + "'.");
                    Character edge = new Character (input.next ());
                    last = new Pattern (true, Collections.singleton (edge));
                    last = last.star ().append (new Pattern (edge));
                    input.read ();
                    ch = input.next ();
                    while (ch != '\'' && ch != '"') {
                        if (ch == 0)
                            throw new ParseException ("Unexpected character '" + ch + "'.");
                        last = last.plus ();
                        Integer endN = last.dg.getEnds ().iterator ().next ();
                        Integer newE = last.nodeFactory.createNode ();
                        last.dg.addNode (newE);
                        last.dg.addEdge (endN, newE, new Character (input.next ()));
                        last.dg.setEnds (Collections.singleton (newE));
                        input.read ();
                        ch = input.next ();
                    }
                    input.read ();
                    break;
                case ')':
                    if (last != null) pattern = pattern.append (last);
                    return pattern;
                case '.':
                    input.read ();
                    if (last != null) pattern = pattern.append (last);
                    last = new Pattern (Pattern.STAR);
                    break;
                case '[':
                    input.read ();
                    if (last != null) pattern = pattern.append (last);
                    boolean not = false;
                    ch = input.next ();
                    if (ch == '^') {
                        input.read ();
                        ch = input.next ();
                        not = true;
                    }
                    Set set = new HashSet ();
                    char l = (char) 0;
                    boolean minus = false;
                    ch = input.next ();
                    while (ch != ']' && ch != 0) {
                        switch (ch) {
                            case ' ':
                            case '\t':
                            case '\n':
                            case '\r':
                                input.read ();
                                break;
                            case '\'':
                            case '"':
                                char ol = l;
                                if (l != 0 && !minus) 
                                    set.add (new Character (l));
                                input.read ();
                                ch = input.next ();
                                if (ch == '\\') {
                                    input.read ();
                                    ch = input.next ();
                                    switch (ch) {
                                        case 'n':
                                            l = '\n';
                                            break;
                                        case 't':
                                            l = '\t';
                                            break;
                                        case 'r':
                                            l = '\r';
                                            break;
                                        case '\'':
                                            l = '\'';
                                            break;
                                        case '\\':
                                            l = '\\';
                                            break;
                                        case '"':
                                            l = '"';
                                            break;
                                        case 'u':
                                            l = 0;
                                            for (int i = 16*16*16; i >= 1; i/=16) {
                                                input.read ();
                                                char c = input.next ();
                                                int ii = 0;
                                                if ('0' <= c && c <= '9') {
                                                    ii = c - '0';
                                                } else if ('a' <= c && c <= 'f') {
                                                    ii = c - 'a' + 10;
                                                } else if ('A' <= c && c <= 'F') {
                                                    ii = c - 'A' + 10;
                                                } else {
                                                    throw new ParseException ("Unexpected character after \\u:" + c);
                                                }
                                                l += ii * i;
                                            }
                                            break;
                                        default:
                                            throw new ParseException ("Unexpected character '" + ch + "'.");
                                    } // switch
                                    input.read ();
                                } else // if '\\'
                                    l = input.read ();
                                ch = input.next ();
                                if (ch != '"' && ch != '\'')
                                    throw new ParseException ("Unexpected character '" + ch + "'.");
                                input.read ();
                                if (minus) {
                                    addInterval (set, ol, l);
                                    l = 0;
                                }
                                minus = false;
                                break; // case '"'
                            case '-':
                                input.read ();
                                if (l == 0) throw new ParseException ("Unexpected character '-'.");
                                minus = true;
                                break;
//                            case '<':
//                                input.read ();
//                                if (minus) throw new ParseException (input.toString ());
//                                if (l != 0) 
//                                    set.add (new Character (l));
//                                set.add (readToken (input));
//                                if (input.read () != '>')
//                                    throw new ParseException ("> expected: " + input);
//                                break;
                            default:
                                throw new ParseException ("Unexpected character '" + ch + "'.");
                        } // switch
                        ch = input.next ();
                    } // while
                    if (minus) throw new ParseException ("Unexpected character '" + ch + "'.");
                    if (l != 0) 
                        set.add (new Character (l));
                    input.read ();
                    last = new Pattern (not, set);
                    break;
                default:
                    throw new ParseException ("Unexpected character '" + ch + "'.");
//                    input.read ();
//                    if (last != null) pattern = pattern.append (last);
//                    last = new Pattern (new Character (ch));
            } // switch
            ch = input.next ();
        } // while
        if (last != null) pattern = pattern.append (last);
        return pattern;
    }

//    private static ASTToken readToken (CharInput input) throws ParseException {
//        StringBuilder sb = new StringBuilder ();
//        char ch = input.next ();
//        while (ch != ',' && ch != '>') {
//            if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
//            sb.append (ch);
//            input.read ();
//            ch = input.next ();
//        }
//        ch = input.next ();
//        String type = sb.toString ().trim ();
//        if (ch == '>') return ASTToken.create (type, null);
//        input.read ();
//        skipWhitespaces (input);
//        sb = new StringBuilder ();
//        ch = input.next ();
//        boolean read = ch != '"' && ch != '\'';
//        if (!read) {
//            input.read ();
//            ch = input.next ();
//        }
//        while (ch != '>' && ch != '"' && ch != '\'' && ch != ',') {
//            if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
//            sb.append (ch);
//            input.read ();
//            ch = input.next ();
//        }
//        if (read && (ch == '"' || ch == '\'')) throw new ParseException ("Unexpected \":" + input.toString ());
//        if (!read) input.read ();
//        String identifier = null;
//        String name = null;
//        if (read) name = sb.toString ();
//        else identifier = sb.toString ();
//        if (!read && ch == ',') {
//            ch = input.next ();
//            sb = new StringBuilder ();
//            while (ch != '>') {
//                if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
//                sb.append (ch);
//                input.read ();
//                ch = input.next ();
//            }
//            name = sb.toString ();
//        }
//        return ASTToken.create (type, identifier);
//    }
    
    private static Set whitespace = new HashSet ();
    static {
        whitespace.add (new Character (' '));
        whitespace.add (new Character ('\n'));
        whitespace.add (new Character ('\r'));
        whitespace.add (new Character ('\t'));
    }
    
    private static void skipWhitespaces (CharInput input) {
        while (whitespace.contains (new Character (input.next ())))
            input.read ();
    }
    
    private static void addInterval (Set set, char from, char to) 
    throws ParseException {
        if (from > to) throw new ParseException ("Invalid interval (" + from + ">" + to + ").");
        do {
            set.add (new Character (from));
            from++;
        } while (from <= to);
    }
    
    private DG dg;// = DG.createDG ();
    
    private Pattern (DG dg) {
        this.dg = dg;
    }
    
    private Pattern () {
        dg = DG.createDG (nodeFactory.createNode ());
//        Integer start = nodeFactory.createNode ();
//        dg.addNode (start);
//        dg.setStart (start);
//        dg.addEnd (start);
    }

    private Pattern (Pattern p) {
        dg = DGUtils.cloneDG (p.dg, false, nodeFactory);
    }
    
    private Pattern (Character edge) {
        Integer start = nodeFactory.createNode ();
        dg = DG.createDG (start);
        Integer end = nodeFactory.createNode ();
        dg.addNode (end);
        dg.addEdge (start, end, edge);
        dg.setEnds (Collections.singleton (end));
    }

    private Pattern (boolean not, Set edges) {
        Integer start = nodeFactory.createNode ();
        dg = DG.createDG (start);
        Integer end = nodeFactory.createNode ();
        dg.addNode (end);
        dg.setStart (start);
        dg.setEnds (Collections.emptySet ());
        Iterator it = edges.iterator ();
        while (it.hasNext ()) {
            Character edge = it.next ();
            dg.addEdge (start, end, edge);
        }
        if (not) {
            Integer failedState = nodeFactory.createNode ();
            dg.addNode (failedState);
            dg.addEdge (start, failedState, Pattern.STAR);
            dg.addEnd (failedState);
        } else 
            dg.addEnd (end);
    }
    
    public Pattern clonePattern () {
        return new Pattern (this);
    }

    public Pattern star () {
        DG ndg = DGUtils.plus (dg, STAR, nodeFactory);
        ndg = DGUtils.merge (DG.createDG (nodeFactory.createNode ()), ndg, STAR, nodeFactory);
        Pattern p = new Pattern (ndg);
        return p;
    }

    public Pattern plus () {
        DG ndg = DGUtils.plus (dg, STAR, nodeFactory);
        Pattern p = new Pattern (ndg);
        return p;
    }

    public Pattern question () {
        DG ndg = DGUtils.cloneDG (dg, true, nodeFactory);
        ndg.addEnd (ndg.getStartNode ());
        Pattern p = new Pattern (ndg);
        return p;
    }

    public Pattern merge (Pattern parser) {
        DG ndg = DGUtils.merge (dg, parser.dg, STAR, nodeFactory);
        Pattern p = new Pattern (ndg);
        return p;
    }

    public Pattern append (Pattern parser) {
        DG ndg = DGUtils.append (dg, parser.dg, STAR, nodeFactory);
        Pattern p = new Pattern (ndg);
        return p;
    }

    public boolean matches (String text) {
        int i = 0;
        Integer state = dg.getStartNode ();
        while (i < text.length ()) {
            state = dg.getNode (state, new Character (text.charAt (i++)));
            if (state == null) return false;
        }
        return dg.getEnds ().contains (state);
    }

    public Integer next (CharInput input) {
        return next (dg.getStartNode (), input);
    }
    
    public Integer next (Integer state, CharInput input) {
        int lastIndex = input.getIndex ();
        Integer lastState = null;
        while (state != null) {
            if (dg.getEnds ().contains (state)) {
                lastState = state;
                lastIndex = input.getIndex ();
            }
            if (input.eof ()) break;
            Integer newState = dg.getNode (state, new Character (input.next ()));
            if (newState != null)
                state = newState;
            else
                state = dg.getNode (state, STAR);
            if (state != null) input.read ();
        }
        input.setIndex (lastIndex);
        return lastState;
    }

    public String toString () {
        return dg.toString ();
    }
    
//    public Object getValue (Object state, Object key) {
//        return dg.getProperty (state, key);
//    }
    
//    DG getDG () {
//        return dg;
//    }
    
    
    public Object read (CharInput input) {
        if (input.eof ()) return null;
        int originalIndex = input.getIndex ();
        int lastIndex = -1;
        TokenType    lastTT = null;
        Integer node = dg.getStartNode ();
        while (!input.eof ()) {
            Character edge = new Character (input.next ());
            Integer nnode = dg.getNode (node, edge);
            if (nnode == null) {
                edge = Pattern.STAR;
                nnode = dg.getNode (node, edge);
            }
            
            if (input.getIndex () > originalIndex) {
                TokenType bestTT = getBestTT (node);
                if (bestTT != null) {
                    lastTT = bestTT;
                    lastIndex = input.getIndex ();
                }
            }
            
            if (nnode == null ||
                ( dg.getEdges (nnode).isEmpty () &&
                  dg.getProperties (nnode).isEmpty ()
                )
            ) {
                if (lastTT == null) {
                    // error => reset position in CURRENT pattern (state)
                    return null;
                }
                input.setIndex (lastIndex);
                return lastTT;
            }
            
            input.read ();
            node = nnode;
        }
        
        TokenType bestTT = getBestTT (node);
        if (bestTT != null) {
            lastTT = bestTT;
            lastIndex = input.getIndex ();
        }
        
        if (lastTT == null) return null;
        return lastTT;
    }
    
    private TokenType getBestTT (Integer node) {
        Map tts = dg.getProperties (node);
        TokenType best = null;
        Iterator it = tts.keySet ().iterator ();
        while (it.hasNext ()) {
            Integer i = (Integer) it.next ();
            TokenType tt = (TokenType) tts.get (i);
            if (best == null || best.getPriority () > tt.getPriority ())
                best = tt;
        }
        return best;
    }
    
    void mark (int priority, TokenType r) {
        Iterator it = dg.getEnds ().iterator ();
        while (it.hasNext ()) {
            Integer s = it.next ();
            dg.setProperty (
                s, 
                priority, 
                r
            );
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy