net.sourceforge.pmd.cpd.PLSQLTokenizer Maven / Gradle / Ivy

Go to download
package net.sourceforge.pmd.cpd;

import java.io.StringReader;
import java.util.Properties;

import net.sourceforge.pmd.lang.plsql.ast.Token;
import net.sourceforge.pmd.lang.plsql.ast.Node;
import net.sourceforge.pmd.lang.plsql.ast.SimpleCharStream;
import net.sourceforge.pmd.lang.plsql.ast.SimpleNode;
import net.sourceforge.pmd.lang.plsql.ast.PLSQLParserConstants;
import net.sourceforge.pmd.lang.plsql.ast.PLSQLParserTokenManager;

public class PLSQLTokenizer implements Tokenizer{

    /**
    */
    public static final String IGNORE_COMMENTS = "ignore_comments";
    public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
    public static final String IGNORE_LITERALS = "ignore_literals";

    private boolean isInitialised = false;
    private boolean ignoreComments;
    private boolean ignoreIdentifiers;
    private boolean ignoreLiterals;

    public void setProperties(Properties properties) {
		/* The Tokenizer is derived from PLDoc, in which comments are very important
		 * When looking for duplication, we are probably not interested in comment variation,
		 * so we shall default ignoreComments to true
		*/
        ignoreComments = Boolean.parseBoolean(properties.getProperty(IGNORE_COMMENTS, "true"));
        ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
        ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
    }

    public void setIgnoreComments(boolean ignore) {
	this.ignoreComments = ignore;
    }

    public void setIgnoreLiterals(boolean ignore) {
	this.ignoreLiterals = ignore;
    }

    public void setIgnoreIdentifiers(boolean ignore) {
	this.ignoreIdentifiers = ignore;
    }

        /**
         * Read Reader from SourceCode and output an ordered tree of PLSQL tokens.
         * @param sourceCode PLSQL source in file, string or database (any suitable object that can return
         * a Reader).
         * @param tokenEntries  Derived based on PLSQL Abstract Syntax Tree (derived from PLDOc parser.) 
         */
	public void tokenize (SourceCode sourceCode, Tokens tokenEntries )
	{
            long encounteredTokens = 0, addedTokens = 0;
		//Initialisation has to go here because the System properties are not set up when the Tokenizer is constructed 
	    if (!isInitialised) {
			setProperties(System.getProperties());
			isInitialised =  true;

			System.err.println("PLSQLTokenizer: ignoreComments=="+ignoreComments);
			System.err.println("PLSQLTokenizer: ignoreIdentifiers=="+ignoreIdentifiers);
			System.err.println("PLSQLTokenizer: ignoreLiterals=="+ignoreLiterals);
		}

		String fileName = sourceCode.getFileName();
		StringBuilder sb = sourceCode.getCodeBuffer();

		PLSQLParserTokenManager tokenMgr = new PLSQLParserTokenManager( new SimpleCharStream( new StringReader(sb.toString()))); 
		Token currentToken = tokenMgr.getNextToken();
		while (currentToken.image.length()  > 0)
		{
			String image = currentToken.image;

                        encounteredTokens++;
			if (ignoreComments && 
			    ( currentToken.kind == PLSQLParserConstants.SINGLE_LINE_COMMENT
			    ||currentToken.kind == PLSQLParserConstants.MULTI_LINE_COMMENT
			    ||currentToken.kind == PLSQLParserConstants.FORMAL_COMMENT
			    ||currentToken.kind == PLSQLParserConstants.COMMENT
			    ||currentToken.kind == PLSQLParserConstants.IN_MULTI_LINE_COMMENT
			    ||currentToken.kind == PLSQLParserConstants.IN_FORMAL_COMMENT
				)
				) {
				image = String.valueOf(currentToken.kind);
			}

			if (ignoreIdentifiers && 
			    (currentToken.kind == PLSQLParserConstants.IDENTIFIER
				)
				) {
				image = String.valueOf(currentToken.kind);
			}

			if (ignoreLiterals
				&& (   
					   currentToken.kind == PLSQLParserConstants.UNSIGNED_NUMERIC_LITERAL 
					|| currentToken.kind == PLSQLParserConstants.FLOAT_LITERAL
					|| currentToken.kind == PLSQLParserConstants.INTEGER_LITERAL
					|| currentToken.kind == PLSQLParserConstants.CHARACTER_LITERAL
				    || currentToken.kind == PLSQLParserConstants.STRING_LITERAL
					|| currentToken.kind == PLSQLParserConstants.QUOTED_LITERAL
					)
				) {
				image = String.valueOf(currentToken.kind);
			}

			tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
                        addedTokens++;
			currentToken = tokenMgr.getNextToken();
		}
		tokenEntries.add(TokenEntry.getEOF() );
	}



}