All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sourceforge.pmd.lang.python.cpd.PythonCpdLexer Maven / Gradle / Ivy

There is a newer version: 7.8.0
Show newest version
/*
 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
 */

package net.sourceforge.pmd.lang.python.cpd;

import java.util.regex.Pattern;

import net.sourceforge.pmd.cpd.impl.JavaccCpdLexer;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccToken;
import net.sourceforge.pmd.lang.ast.impl.javacc.JavaccTokenDocument.TokenDocumentBehavior;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.python.ast.PythonTokenKinds;

/**
 * The Python tokenizer.
 *
 * 

Note: This class has been called PythonTokenizer in PMD 6

. */ public class PythonCpdLexer extends JavaccCpdLexer { private static final Pattern STRING_NL_ESCAPE = Pattern.compile("\\\\\\r?\\n"); private static final TokenDocumentBehavior TOKEN_BEHAVIOR = new TokenDocumentBehavior(PythonTokenKinds.TOKEN_NAMES); @Override protected TokenManager makeLexerImpl(TextDocument doc) { return PythonTokenKinds.newTokenManager(CharStream.create(doc, TOKEN_BEHAVIOR)); } @Override protected String getImage(JavaccToken token) { switch (token.kind) { case PythonTokenKinds.SINGLE_STRING: case PythonTokenKinds.SINGLE_STRING2: case PythonTokenKinds.SINGLE_BSTRING: case PythonTokenKinds.SINGLE_BSTRING2: case PythonTokenKinds.SINGLE_USTRING: case PythonTokenKinds.SINGLE_USTRING2: // linebreak escapes, only for single-quoted strings // todo other escapes? return STRING_NL_ESCAPE.matcher(token.getImage()).replaceAll(""); default: return token.getImage(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy