eu.cqse.check.framework.scanner.ELanguage Maven / Gradle / Ivy
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.cqse.check.framework.scanner;
import java.io.File;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.conqat.engine.core.configuration.EFeatureToggle;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.filesystem.FileSystemUtils;
import org.conqat.lib.commons.js_export.ExportToTypeScript;
import org.conqat.lib.commons.resources.Resource;
import org.conqat.lib.commons.test.IndexValueClass;
/**
* Enumeration class for the languages support by the scanner framework.
*
* This class is used as DTO during communication with IDE clients via
* {@link com.teamscale.ide.commons.client.IIdeServiceClient}, special care has
* to be taken when changing its signature!
*/
@SuppressWarnings("JavadocReference")
@ExportToTypeScript
@IndexValueClass(containedInBackup = true)
public enum ELanguage {
// we need to use the ugly workaround with asHashSet here as we cannot use
// varargs twice in the constructor
/** Java */
JAVA("Java", true, "java"),
/** C++ */
CPP("C++", true, "cpp", "hpp", "cxx", "hxx", "cc", "hh", "h", "inc", "inl"),
/** C */
C("C", true, "c", "h", "inc"),
/** OpenCL C/C++ */
OPEN_CL("OpenCL C/C++", true, "cl"),
/** Rust */
RUST("Rust", true, "rs"),
/** Visual Basic */
VB("Visual Basic", false, "vb", "frm", "cls", "bas"),
/** COBOL */
COBOL("COBOL", false, "cbl", "cob", "cobol", "cpy", "eco"),
/** C# */
CS("C#", true, "cs"),
/** ABAP */
ABAP("ABAP", false, "abap"),
/**
* ABAP Dictionary. This is a textual representation of objects in the ABAP
* Dictionary (aka DDIC). The format is based on the textual format for
* structure and database table definition which is available starting with SAP
* ABP Platform v7.51 or v7.52 respectively. See .
* This format is again based on the SAP
* CDS DDL. If available, we export ABAP DDIC objects in the textual
* representation as provided by the SAP system, otherwise our Teamscale
* Connector for the SAP ABAP Platform serializes the text in a similar format.
*
* This language is not selectable in analysis profiles because it does not make
* sense to use this language in a non-Abap project. Technically the reason is
* that the language is not listed in
* IndexBasedAnalysisConstants#SUPPORTED_LANGUAGES. Even if the language is not
* part of the analysis profile, **.abap_ddic files will be parsed/analyzed with
* the correct language.
*
* If we write a check for ABAP_DDIC files, then the check must analyze
* ELanguage.ABAP, too. Otherwise, it won't be selectable in profiles.
*/
ABAP_DDIC("ABAP Dictionary", false, "abap_ddic"),
/** Ada */
ADA("Ada", false, "ada", "ads", "adb"),
/** Natural language text */
TEXT("Plain Text", false, "txt"),
/** XML */
XML("XML", true, "xml", "xsl", "xslt", "architecture", "cqb", "csproj", "config", "prj"),
/** HANA SQLScript */
SQLSCRIPT("HANA SQLScript", false, "sql", "hdbprocedure", "hdbfunction", "hdbscalarfunction", "hdbtablefunction"),
/**
* HANA Views XML. This includes view specifications which of SAP HANA database
* which are stored in XML. Does NOT include HANA code which is not XML (e.g.
* *.hdbview)
*/
HANA_VIEW("HANA View", true, "analyticview", "attributeview", "calculationview"),
/** PL/SQL */
PLSQL("PL/SQL", false, "sql", "pks", "pkb", "trg", "fnc", "typ", "tyb", "prc", "plsql"),
/** Python */
PYTHON("Python", true, "py"),
/** T-SQL aka Transact SQL. */
TSQL("Transact-SQL", false, "tsql", "sql"),
/** Matlab */
MATLAB("Matlab", true, "m"),
/** PHP */
PHP("PHP", true, "php", "php3", "php4", "php5"),
/**
* JavaScript including EcmaScript and TypeScript.
*
* Note that the statement oracle only works if semicolons are used
* consistently. However, semicolons are optional in JavaScript (rules described
* here), but to determine end
* of statement in this case requires a full-blown parser (hard to decide
* locally in some cases). As most coding guidelines recommend using semicolons
* anyway, we stick with this solution.
*/
JAVASCRIPT("JavaScript/TypeScript", true, "js", "sj", "jsx", "ts", "tsx"),
/**
* Use this for languages for which no dedicated scanner is available. Creates a
* token per line (and creates EOL tokens).
*/
LINE("Line-based Text", false),
/** Delphi */
DELPHI("Delphi", false, "pas", "dpr"),
/**
* IEC 61131-3 Structured Text. We understand both the code (PU) and variable
* structure (SV).
*
* - .pou files contain program code (written in ST language)
* - .dt files contain type declarations (written in ST language)
*
* Sadly, the file endings and format are not standardized in IEC-61131-3.
*/
IEC61131("IEC 61131-3 ST", true, "pu", "sv", "st", "scl", "pou", "dt", "var", "tu"),
/** Fortran */
FORTRAN("Fortran", false, "f", "for", "f77", "f90", "f95", "f03"),
/** Xtend */
XTEND("Xtend", true, "xtend"),
/** Swift */
SWIFT("Swift", true, "swift"),
/** OCaml */
OCAML("OCaml", true, "ml", "mli"),
/** Opentext Oscript */
OSCRIPT("OScript", true, "osx", "lxe", "os"),
/** Groovy */
GROOVY("Groovy", true, "groovy"),
/** Natural language requirements */
NL_REQUIREMENTS("Natural Language Requirements", false),
/** Natural language tests */
NL_TESTS("Natural Language Tests", false, "nltests"),
/** Natural language issues */
NL_ISSUES("Natural Language Issues", false),
/** Simulink and Stateflow. */
SIMULINK("Simulink and Stateflow", false, "mdl", "slx", "sldd"),
/** Gosu (ref). */
GOSU("Gosu", true, "gsp", "gs", "gsx", "gr", "grs"),
/** Kotlin. */
KOTLIN("Kotlin", true, "kt", "kts", "ktm"),
/** Objective-C. */
OBJECTIVE_C("Objective-C", true, "m", "h"),
/** Objective C++. */
OBJECTIVE_CPP("Objective-C++", true, "mm", "h", "inc", "inl"),
/** JavaDoc */
JAVADOC("JavaDoc", true),
/** AbapDoc */
ABAPDOC("Abap Doc", true),
/** Go */
GO("Go", true, "go"),
/** Files in the "Core Data Services Definition Language" from SAP. */
ABAP_CDS("ABAP CDS", false, /* Teamscale file ending for CDS */ "abap_cds",
/* Teamscale file ending for CDS meta data */ "abap_ddlx", /* AbapGit file ending for CDS */ "asddls",
/* AbapGit file ending for CDS meta data */ "asddlxs",
/* AbapGit file ending for CDS and CDS meta data */ "acds"),
POWERSHELL("Powershell", false, "ps1", "psd1", "psm1"),
ESQL("Extended SQL (ESQL)", false, "esql"),
/**
* Used for Kubernetes resources, such as static manifests, Helm charts and
* Kustomizations.
*/
KUBERNETES("Kubernetes", false, "yaml", "json", "yml");
/** List of languages that do not have methods. */
private static final EnumSet LANGUAGES_WITHOUT_METHODS = EnumSet.of(ABAP_DDIC, LINE, TEXT, XML,
HANA_VIEW);
/**
* List of languages that support preprocessor expansions (macros). Used by the
* frontend code.
*/
@SuppressWarnings("unused")
private static final EnumSet LANGUAGES_WITH_PREPROCESSOR_EXPANSIONS = EnumSet.of(C, CPP, OBJECTIVE_C,
OBJECTIVE_CPP);
/**
* List of languages that should be hidden from the UI. Languages should be
* hidden if the corresponding features (parsers, checks...) are either
* experimental or outdated but needed for legacy support or compatability
* reasons. Showing hidden languages in the UI can be activated by enabling the
* feature toggle {@link EFeatureToggle#SHOW_HIDDEN_LANGUAGES}. See also
* TS-31076.
*/
public static final Set HIDDEN_LANGUAGES = EnumSet.of(RUST, KUBERNETES);
/**
* This maps from extensions to languages that typically have files with these
* extensions. The values of this map are {@link EnumSet}s, not {@link Set}s
* since {@link EnumSet} has a stable traversal order when called via
* {@link Set#stream()}. That is important for keeping our analysis results
* stable.
*/
private static final Map> EXTENSION_2_LANGUAGE_MAP = new HashMap<>();
// Initialize {@link #EXTENSION_2_LANGUAGE_MAP}.
static {
for (ELanguage language : values()) {
for (String extension : language.extensions) {
EnumSet languagesForExtension = EXTENSION_2_LANGUAGE_MAP
.computeIfAbsent(extension.toLowerCase(), ext -> EnumSet.noneOf(ELanguage.class));
languagesForExtension.add(language);
}
}
}
/** The readable name of the language, to be used, e.g., in a UI. */
private final String readableName;
/** Whether the language is case sensitive. */
private final boolean caseSensitive;
/** File extensions commonly used for this language. */
private final String[] extensions;
/** Create language. */
ELanguage(String readableName, boolean caseSensitive, String... extensions) {
this.readableName = readableName;
this.caseSensitive = caseSensitive;
this.extensions = extensions;
}
/** Returns {@link #readableName}. */
public String getReadableName() {
return readableName;
}
/** Return whether the language is case-sensitive. */
public boolean isCaseSensitive() {
return caseSensitive;
}
/** Get the file extensions commonly used for this language. */
public String[] getFileExtensions() {
return CollectionUtils.copyArray(extensions);
}
/**
* Gets the {@link ELanguage} value corresponding to the extension of the
* resource. Returns null if no extension was found. If there are multiple
* possible languages, the first one (with a stable ordering) is returned.
*/
public static ELanguage fromResource(Resource resource) {
return getAllLanguagesForExtension(resource.getExtension()).stream().findFirst().orElse(LINE);
}
/**
* Gets the {@link ELanguage} value corresponding to the file extension of the
* path. Returns null if no extension was found. If there are multiple possible
* languages, the first one is returned. This method should only be used for
* test code or as a fallback since file extensions may match multiple
* {@link ELanguage}s.
*
* @deprecated This method should not be used in production because it is in
* usually not possible to determine the {@link ELanguage} of a file
* based on its uniform path. We allow fine-grained configuration of
* the language mapping (project-connector option) and spend a lot
* of effort on finding the right language for a file based on file
* name, languages configured in the analysis profile, and file
* content. Also, there are some languages which by default share
* file extensions (e.g., CPP and Objective-C, or Objective-C and
* Matlab). To determine the {@link ELanguage} of a file, use its
* {@link TokenElementInfo}, that is the only "correct" source.
*/
@Deprecated
public static ELanguage fromPath(String path) {
return fromFile(new File(path));
}
/**
* Gets the {@link ELanguage} value corresponding to the file extension of the
* file. Returns {@link ELanguage#LINE} if no extension was found. If there are
* multiple possible languages, the first one is returned. This method should
* only be used for test code or as a fallback since file extensions may match
* multiple {@link ELanguage}s.
*
* If there are multiple potential languages, this method will always choose the
* same language as "first" one (stable result).
*
* @deprecated See {@link #fromPath(String)}
*/
@Deprecated
public static ELanguage fromFile(File file) {
return getAllLanguagesForExtension(FileSystemUtils.getFileExtension(file)).stream().sorted().findFirst()
.orElse(LINE);
}
/**
* Returns all languages for the given file extension. This returns the empty
* EnumSet if no language is registered for the given extension. The returned
* set may be modified by the caller.
*
* This method returns {@link EnumSet} instead of {@link Set} since
* {@link EnumSet} has a stable order. This is important if we (for example)
* select the first language and want to achieve stable, repeatable analysis
* results.
*/
public static EnumSet getAllLanguagesForExtension(String extension) {
if (extension == null) {
return EnumSet.noneOf(ELanguage.class);
}
EnumSet languages = EXTENSION_2_LANGUAGE_MAP.get(extension.toLowerCase());
if (languages == null) {
return EnumSet.noneOf(ELanguage.class);
}
return EnumSet.copyOf(languages);
}
/**
* Returns all {@link ELanguage}s matching the file extension of the given file
* path.
*/
public static EnumSet getAllLanguagesForPath(String path) {
return getAllLanguagesForExtension(FileSystemUtils.getFileExtension(path));
}
/** @return Whether the given language has the concept of methods. */
public static boolean languageHasMethods(ELanguage language) {
return !LANGUAGES_WITHOUT_METHODS.contains(language);
}
/** Returns if this is {@link #OBJECTIVE_C} or {@link #OBJECTIVE_CPP}. */
public boolean isObjectiveCOrObjectiveCpp() {
return LanguageGroups.OBJECTIVE_C_AND_OBJECTIVE_CPP.contains(this);
}
/** Returns if this is {@link #C} or {@link #CPP}. */
public boolean isCppOrC() {
return LanguageGroups.C_AND_CPP.contains(this);
}
/** Returns if this is {@link #CPP} or {@link #OBJECTIVE_CPP}. */
public boolean isCppOrObjectiveCpp() {
return LanguageGroups.CPP_AND_OBJECTIVE_CPP.contains(this);
}
/**
* Returns the languages that should be shown in the UI. If the feature toggle
* {@link EFeatureToggle#SHOW_HIDDEN_LANGUAGES} is true, all languages will be
* returned, else languages included in {@link #HIDDEN_LANGUAGES} will not be
* included in the returned array.
*/
public static ELanguage[] getUserVisibleLanguages() {
if (!EFeatureToggle.SHOW_HIDDEN_LANGUAGES.isEnabled()) {
return Arrays.stream(values()).filter(language -> !HIDDEN_LANGUAGES.contains(language))
.toArray(ELanguage[]::new);
}
return values();
}
}