eu.cqse.check.framework.scanner.ELanguage Maven / Gradle / Ivy
Show all versions of teamscale-commons Show documentation
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.cqse.check.framework.scanner;
import java.io.File;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.conqat.engine.core.configuration.EFeatureToggle;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.filesystem.FileSystemUtils;
import org.conqat.lib.commons.js_export.ExportToTypeScript;
import org.conqat.lib.commons.resources.Resource;
import org.conqat.lib.commons.test.IndexValueClass;
/**
* Enumeration class for the languages support by the scanner framework.
*
* This class is used as DTO during communication with IDE clients via
* {@link com.teamscale.ide.commons.client.IIdeServiceClient}, special care has to be taken when
* changing its signature!
*/
@ExportToTypeScript
@IndexValueClass(containedInBackup = true)
public enum ELanguage {
/** Java */
JAVA("Java", true, "java"),
/** C++ */
CPP("C++", true, "cpp", "hpp", "cxx", "hxx", "cc", "hh", "h", "inc", "inl"),
/** C */
C("C", true, "c", "h", "inc"),
/** OpenCL C/C++ */
OPEN_CL("OpenCL C/C++", true, "cl"),
/** Rust */
RUST("Rust", true, "rs"),
/** Visual Basic */
VB("Visual Basic", false, "vb", "frm", "cls", "bas"),
/** COBOL */
COBOL("COBOL", false, "cbl", "cob", "cobol", "cpy", "eco"),
/** C# */
CS("C#", true, "cs"),
/** ABAP */
ABAP("ABAP", false, "abap"),
/**
* ABAP Dictionary. This is a textual representation of objects in the ABAP Dictionary (aka DDIC).
* The format is based on the textual format for structure and database table definition which is
* available starting with SAP ABP Platform v7.51 or v7.52 respectively. See
* .
* This format is again based on the SAP CDS
* DDL. If available, we export ABAP DDIC objects in the textual representation as provided by
* the SAP system, otherwise our Teamscale Connector for the SAP ABAP Platform serializes the text
* in a similar format.
*
* This language is not selectable in analysis profiles because it does not make sense to use this
* language in a non-Abap project. Technically the reason is that the language is not listed in
* IndexBasedAnalysisConstants#SUPPORTED_LANGUAGES. Even if the language is not part of the analysis
* profile, **.abap_ddic files will be parsed/analyzed with the correct language.
*
* If we write a check for ABAP_DDIC files, then the check must analyze ELanguage.ABAP, too.
* Otherwise, it won't be selectable in profiles.
*/
ABAP_DDIC("ABAP Dictionary", false, "abap_ddic"),
/** Ada */
ADA("Ada", false, "ada", "ads", "adb"),
/** Natural language text */
TEXT("Plain Text", false, "txt"),
/** XML */
XML("XML", true, "xml", "xsl", "xslt", "architecture", "cqb", "csproj", "config", "prj"),
/** HANA SQLScript */
SQLSCRIPT("HANA SQLScript", false, "sql", "hdbprocedure", "hdbfunction", "hdbscalarfunction", "hdbtablefunction"),
/**
* HANA Views XML. This includes view specifications which of SAP HANA database which are stored in
* XML. Does NOT include HANA code which is not XML (e.g. *.hdbview)
*/
HANA_VIEW("HANA View", true, "analyticview", "attributeview", "calculationview"),
/** PL/SQL */
PLSQL("PL/SQL", false, "sql", "pks", "pkb", "trg", "fnc", "typ", "tyb", "prc", "plsql"),
/** Python */
PYTHON("Python", true, "py"),
/** T-SQL aka Transact SQL. */
TSQL("Transact-SQL", false, "tsql", "sql"),
/** Matlab */
MATLAB("Matlab", true, "m"),
/** PHP */
PHP("PHP", true, "php", "php3", "php4", "php5"),
/**
* JavaScript including EcmaScript and TypeScript.
*
* Note that the statement oracle only works if semicolons are used consistently. However,
* semicolons are optional in JavaScript (rules described
* here), but to determine end of statement in
* this case requires a full-blown parser (hard to decide locally in some cases). As most coding
* guidelines recommend using semicolons anyway, we stick with this solution.
*/
JAVASCRIPT("JavaScript/TypeScript", true, "js", "sj", "jsx", "ts", "tsx"),
/**
* Use this for languages for which no dedicated scanner is available. Creates a token per line (and
* creates EOL tokens).
*/
LINE("Line-based Text", false, "yaml", "yml", "json"),
/** Delphi */
DELPHI("Delphi", false, "pas", "dpr"),
/**
* IEC 61131-3 Structured Text. We understand both the code (PU) and variable structure (SV).
*
* - .pou files contain program code (written in ST language)
* - .dt files contain type declarations (written in ST language)
*
* Sadly, the file endings and format are not standardized in IEC-61131-3.
*/
IEC61131("IEC 61131-3 ST", true, "pu", "sv", "st", "scl", "pou", "dt", "var", "tu"),
/** Fortran */
FORTRAN("Fortran", false, "f", "for", "f77", "f90", "f95", "f03"),
/** Xtend */
XTEND("Xtend", true, "xtend"),
/** Swift */
SWIFT("Swift", true, "swift"),
/** OCaml */
OCAML("OCaml", true, "ml", "mli"),
/** Opentext Oscript */
OSCRIPT("OScript", true, "osx", "lxe", "os"),
/** Groovy */
GROOVY("Groovy", true, "groovy"),
/** Natural language requirements */
NL_REQUIREMENTS("Natural Language Requirements", false),
/** Natural language tests */
NL_TESTS("Natural Language Tests", false, "nltests"),
/** Natural language issues */
NL_ISSUES("Natural Language Issues", false),
/** Simulink and Stateflow. */
SIMULINK("Simulink and Stateflow", false, "mdl", "slx", "sldd"),
/** Gosu (ref). */
GOSU("Gosu", true, "gsp", "gs", "gsx", "gr", "grs"),
/** Kotlin. */
KOTLIN("Kotlin", true, "kt", "kts", "ktm"),
/** Objective-C. */
OBJECTIVE_C("Objective-C", true, "m", "h", "c"),
/** Objective-C++. */
OBJECTIVE_CPP("Objective-C++", true, "mm", "h", "inc", "inl", "cpp", "hpp"),
/** JavaDoc */
JAVADOC("JavaDoc", true),
/**
* AbapDoc, syntax inside documentation comments in abap. AbapDoc comments start with "!". This
* language does not have a file extension since it is always embedded in {@link ELanguage#ABAP}
* files. We also don't have a parser (only a scanner that is executed in custom checks).
*/
ABAPDOC("Abap Doc", true),
/** Go */
GO("Go", true, "go"),
/** Files in the "Core Data Services Definition Language" from SAP. */
ABAP_CDS("ABAP CDS", false, /* Teamscale file ending for CDS */ "abap_cds",
/* Teamscale file ending for CDS meta data */ "abap_ddlx", /* AbapGit file ending for CDS */ "asddls",
/* AbapGit file ending for CDS meta data */ "asddlxs",
/* AbapGit file ending for CDS and CDS meta data */ "acds"),
POWERSHELL("Powershell", false, "ps1", "psd1", "psm1"),
ESQL("Extended SQL (ESQL)", false, "esql"),
/**
* Used for Kubernetes resources, such as static manifests, Helm charts and Kustomizations.
*/
KUBERNETES("Kubernetes", false, "yaml", "json", "yml"),
DART("Dart", true, "dart");
/** List of languages that do not have methods. */
private static final EnumSet LANGUAGES_WITHOUT_METHODS = EnumSet.of(ABAP_DDIC, LINE, TEXT, XML,
HANA_VIEW);
/**
* List of languages that support preprocessor expansions (macros). Used by the frontend code.
*
* {@link ELanguage#ABAP} and {@link ELanguage#IEC61131} also have preprocessors, but these don't
* generate {@link eu.cqse.check.framework.preprocessor.c.PreprocessorTokenReplacement}s that we
* want to show in the UI.
*
* @see eu.cqse.check.framework.preprocessor.PreprocessorUtils#LANGUAGES_WITH_PREPROCESSOR
*/
@SuppressWarnings("unused") // used by frontend code only
private static final EnumSet LANGUAGES_WITH_PREPROCESSOR_EXPANSIONS = EnumSet.of(C, CPP, OBJECTIVE_C,
OBJECTIVE_CPP, SWIFT);
/**
* List of languages that should be hidden from the UI. Languages should be hidden if the
* corresponding features (parsers, checks...) are either experimental or outdated but needed for
* legacy support or compatability reasons. Showing hidden languages in the UI can be activated by
* enabling the feature toggle {@link EFeatureToggle#SHOW_HIDDEN_LANGUAGES}. See also TS-31076.
*/
public static final Set HIDDEN_LANGUAGES = EnumSet.of(RUST, KUBERNETES);
/**
* This maps from extensions to languages that typically have files with these extensions. The
* values of this map are {@link EnumSet}s, not {@link Set}s since {@link EnumSet} has a stable
* traversal order when called via {@link Set#stream()}. That is important for keeping our analysis
* results stable.
*/
private static final Map> EXTENSION_2_LANGUAGE_MAP = new HashMap<>();
// Initialize {@link #EXTENSION_2_LANGUAGE_MAP}.
static {
for (ELanguage language : values()) {
for (String extension : language.extensions) {
EnumSet languagesForExtension = EXTENSION_2_LANGUAGE_MAP
.computeIfAbsent(extension.toLowerCase(), ext -> EnumSet.noneOf(ELanguage.class));
languagesForExtension.add(language);
}
}
}
/** The readable name of the language, to be used, e.g., in a UI. */
private final String readableName;
/** Whether the language is case-sensitive. */
private final boolean caseSensitive;
/** File extensions commonly used for this language. */
private final String[] extensions;
/** Create language. */
ELanguage(String readableName, boolean caseSensitive, String... extensions) {
this.readableName = readableName;
this.caseSensitive = caseSensitive;
this.extensions = extensions;
}
/** Returns {@link #readableName}. */
public String getReadableName() {
return readableName;
}
/** Return whether the language is case-sensitive. */
public boolean isCaseSensitive() {
return caseSensitive;
}
/** Get the file extensions commonly used for this language. */
public String[] getFileExtensions() {
return CollectionUtils.copyArray(extensions);
}
/**
* Gets the {@link ELanguage} value corresponding to the extension of the resource. Returns null if
* no extension was found. If there are multiple possible languages, the first one (with a stable
* ordering) is returned.
*/
public static ELanguage fromResource(Resource resource) {
return getAllLanguagesForExtension(resource.getExtension()).stream().findFirst().orElse(LINE);
}
/**
* Gets the {@link ELanguage} value corresponding to the file extension of the path. Returns null if
* no extension was found. If there are multiple possible languages, the first one is returned. This
* method should only be used for test code or as a fallback since file extensions may match
* multiple {@link ELanguage}s.
*
* @deprecated This method should not be used in production because it is in usually not possible to
* determine the {@link ELanguage} of a file based on its uniform path. We allow
* fine-grained configuration of the language mapping (project-connector option) and
* spend a lot of effort on finding the right language for a file based on file name,
* languages configured in the analysis profile, and file content. Also, there are some
* languages which by default share file extensions (e.g., CPP and Objective-C, or
* Objective-C and Matlab). To determine the {@link ELanguage} of a file, use its
* {@link com.teamscale.index.resource.TokenElementInfo TokenElementInfo}, that is the
* only "correct" source.
*/
@Deprecated
public static ELanguage fromPath(String path) {
return fromFile(new File(path));
}
/**
* Gets the {@link ELanguage} value corresponding to the file extension of the file. Returns
* {@link ELanguage#LINE} if no extension was found. If there are multiple possible languages, the
* first one is returned. This method should only be used for test code or as a fallback since file
* extensions may match multiple {@link ELanguage}s.
*
* If there are multiple potential languages, this method will always choose the same language as
* "first" one (stable result).
*
* @deprecated See {@link #fromPath(String)}
*/
@Deprecated
public static ELanguage fromFile(File file) {
return getAllLanguagesForExtension(FileSystemUtils.getFileExtension(file)).stream().sorted().findFirst()
.orElse(LINE);
}
/**
* Returns all languages for the given file extension. This returns the empty EnumSet if no language
* is registered for the given extension. The returned set may be modified by the caller.
*
* This method returns {@link EnumSet} instead of {@link Set} since {@link EnumSet} has a stable
* order. This is important if we (for example) select the first language and want to achieve
* stable, repeatable analysis results.
*/
public static Set getAllLanguagesForExtension(String extension) {
if (extension == null) {
return EnumSet.noneOf(ELanguage.class);
}
EnumSet languages = EXTENSION_2_LANGUAGE_MAP.get(extension.toLowerCase());
if (languages == null) {
return EnumSet.noneOf(ELanguage.class);
}
return EnumSet.copyOf(languages);
}
/**
* Returns all {@link ELanguage}s matching the file extension of the given file path.
*/
public static Set getAllLanguagesForPath(String path) {
return getAllLanguagesForExtension(FileSystemUtils.getFileExtension(path));
}
/** @return Whether the given language has the concept of methods. */
public static boolean languageHasMethods(ELanguage language) {
return !LANGUAGES_WITHOUT_METHODS.contains(language);
}
/** Returns if this is {@link #OBJECTIVE_C} or {@link #OBJECTIVE_CPP}. */
public boolean isObjectiveCOrObjectiveCpp() {
return LanguageGroups.OBJECTIVE_C_AND_OBJECTIVE_CPP.contains(this);
}
/** Returns if this is {@link #C} or {@link #CPP}. */
public boolean isCppOrC() {
return LanguageGroups.C_AND_CPP.contains(this);
}
/** Returns if this is {@link #CPP} or {@link #OBJECTIVE_CPP}. */
public boolean isCppOrObjectiveCpp() {
return LanguageGroups.CPP_AND_OBJECTIVE_CPP.contains(this);
}
/**
* Returns the languages that should be shown in the UI. If the feature toggle
* {@link EFeatureToggle#SHOW_HIDDEN_LANGUAGES} is true, all languages will be returned, else
* languages included in {@link #HIDDEN_LANGUAGES} will not be included in the returned array.
*/
public static ELanguage[] getUserVisibleLanguages() {
if (!EFeatureToggle.SHOW_HIDDEN_LANGUAGES.isEnabled()) {
return Arrays.stream(values()).filter(language -> !HIDDEN_LANGUAGES.contains(language))
.toArray(ELanguage[]::new);
}
return values();
}
}