All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.css.compiler.ast.CssStringNode Maven / Gradle / Ivy

Go to download

Closure Stylesheets is an extension to CSS that adds variables, functions, conditionals, and mixins to standard CSS. The tool also supports minification, linting, RTL flipping, and CSS class renaming.

The newest version!
/*
 * Copyright 2011 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.css.compiler.ast;

import com.google.common.base.CharMatcher;
import com.google.common.base.Function;
import com.google.common.css.SourceCodeLocation;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Node corresponding to a string value.
 * 

* TODO(user): refactor the encoding/decoding logic out of this * class, thereby restoring the architectural integrity of the CssNode * type hierarchy; all these classes should be value types with * no elaborate behaviors. *

* This node represents both a CSS fragment and the abstract value * represented by that concrete syntax. For example, the following * three declarations mean precisely the same thing in CSS: * {@code * .warning { content-before: 'warning:'; } * .warning { content-before: "warning:"; } * .warning { content-before: 'war\06e ing:"; } * } *

* Some clients care about concrete CSS syntax. For high-fidelity * roundtrip CSS processing it is necessary to preserve the original * author's choice of quote character. On the other hand, some clients * care about abstract values. For purposes of machine translation or * typeface resolution, we are uninterested in the differences that * distinguish the cases shown above; in these applications we would * like to deal in terms of the simple Java String {@code warning:}. *

* Java's {@code Character} and {@code String} classes represent * values in the UTF-16 encoding; some codepoints are represented by * surrogate pairs of {@code Character} instances. CSS escape sequences are * designed without a particular encoding in mind; there are CSS * escape sequences that correspond to a single Unicode character * and multiple Java {@code Character} instances. *

* Java's {@code Character} repertoire is a strict subset of the * codepoints that can be represented in CSS. When decoding CSS * escape sequences, this class substitutes the Unicode replacement * character for characters that cannot be represented in Java * {@code Strings}, as permitted by * http://www.w3.org/TR/CSS2/syndata.html#characters * */ public class CssStringNode extends CssValueNode { private static final String LINE_BREAK_PATTERN_STRING = "(?:\\n|\\r\\n|\\r|\\f)"; private static final CharMatcher CONSUMABLE_WHITESPACE = CharMatcher.anyOf("\n\t "); private static final Pattern ESCAPE_CHAR_STRING_CONTINUATION_PATTERN = Pattern.compile("\\\\" + LINE_BREAK_PATTERN_STRING); private static final Pattern ESCAPE_CHAR_NOT_SPECIAL = Pattern.compile("\\\\([^0-9a-fA-F\\n\\r\\f])"); private static final Pattern ESCAPE_CHAR_HARD_TO_TYPE = Pattern.compile("\\\\([0-9a-fA-F]{1,6})(\\r\\n|[ \\t\\r\\n\\f])?"); /** The pattern for HTML markup special characters */ private static final Pattern HTML_PATTERN = Pattern.compile("[<>\"&']"); private static final Pattern LINE_BREAK_PATTERN = Pattern.compile(LINE_BREAK_PATTERN_STRING); private static final Pattern HEX_PATTERN = Pattern.compile("[0-9a-fA-F]+"); private static final Pattern WIDE_NONASCII_PATTERN = Pattern.compile("\\P{ASCII}"); private final Type type; /** * The characters between the quotes in concrete CSS syntax. Some * clients will want exact control and high fidelity for values in * these nodes. Some will expect an AST to disregard unimportant * detail and provide convenient access to a normalized * representation of the stylesheet. This field stores a verbatim * snippet of CSS corresponding to this node. */ private String concreteValue; /** * Constructor of a string node. * * @param type CSS provides multiple syntax alternatives for strings; * which was used for this term? * @param value the Java String representation of this string (not its * concrete CSS syntax) * @param location The location in source code corresponding to this node */ public CssStringNode(Type type, SourceCodeLocation location) { super("", location); setConcreteValue(location.getSourceCode().getFileContents() .substring(location.getBeginCharacterIndex() // for the quote + 1, // we end on the quote, so no need to adjust location.getEndCharacterIndex())); this.type = type; } /** * Constructor of a string node. * * @param type CSS provides multiple syntax alternatives for strings; * which was used for this term? * @param value the Java String representation of this string (not its * concrete CSS syntax) */ public CssStringNode(Type type, String value) { super(value, /* location */ null); this.type = type; setValue(value); } /** * Copy constructor. */ public CssStringNode(CssStringNode node) { super(node); type = node.type; this.concreteValue = node.getConcreteValue(); } public Type getType() { return type; } /** * Specifies the characters that should appear between the quotes * when this node is written as CSS, and updates this node's value * accordingly. *

* For example, the Java method invocation: {@code * n.setConcreteValue("hi\\\""); * } * could result in the CSS: {@code * p { content-after: "hi\""; } * } or perhaps {@code * p { content-after: 'hi\"'; } * }, depending on the {@code CssStringNode.Type} of {@code n} and * the {@code CssTree} in which it occurs, but it would never * result in {@code * p { content-after: "hi\000022"; } * } */ public String setConcreteValue(String concreteValue) { this.concreteValue = concreteValue; super.setValue(unescape(concreteValue)); return concreteValue; } /** * Retrieves the characters that should appear between the quotes * when this node is written in concrete CSS syntax. */ public String getConcreteValue() { return concreteValue; } /** * Establishes a value for this node by conservatively escaping * {@code value} and delegating to {@link #setConcreteValue} to * maintain consistency between the {@link #value} and the * {@link #concreteValue}. *

* This function stores a normalized representation of the given * {@code value}; if you want to work in more exact terms, try * {@link setConcreteValue}. *

* For example, the Java snippet: {@code * n.setValue("Senator's Response") * } could result in the CSS snippet: {@code * p { content-before: "Senator's Response"; } * } * or {@code * p { content-before: 'Senator\'s Response'; } * } * or {@code * p { content-before: 'Senator\27 s Response'; } * }, depending on the {@code CssStringNode.Type} of {@code n} and * the {@code CssTree} in which it occurs and the choice of the * {@code CssTreeVisitor} that renders the output. *

* Note that the {@code value} parameter here will normally not * begin or end with a quotation mark. */ @Override public void setValue(String value) { setConcreteValue(escape(type, HTML_ESCAPER, value)); } @Override public CssStringNode deepCopy() { return new CssStringNode(this); } @Override public String toString() { return type.toString(getValue(), SHORT_ESCAPER); } /** * Determines the canonical Java String representation of a value encoded * in CSS syntax. * * @param escaped whatever lies between the quotes (excluding the quotes * themselves). */ public static String unescape(String escaped) { String result = ESCAPE_CHAR_STRING_CONTINUATION_PATTERN.matcher(escaped).replaceAll(""); result = ESCAPE_CHAR_NOT_SPECIAL.matcher(result).replaceAll("$1"); Matcher unicode = ESCAPE_CHAR_HARD_TO_TYPE.matcher(result); StringBuffer sb = new StringBuffer(); while (unicode.find()) { // CSS allows us to substitute characters above 0x110000. Java // requires us to stay at or below MAX_CODE_POINT. If we are // allowed to substitute, and Java requires us to substitute, // then we substitute. Otherwise: (a) everything's fine without // substitution or (b) CSS does not permit a substitution we // need to make for Java's happiness or (c) CSS allows a // substitution but we don't need it. For (a) and (c) we don't // substitute and that's fine. For (b) we don't substitute, // probably that will produce an exception below, and then we'll // know it's worth thinking about that case some more. int codepoint = Integer.parseInt(unicode.group(1), 16); if (codepoint > 0x10FFFF && codepoint > Character.MAX_CODE_POINT) { // CSS allows us to substitute, and Java requires us not to use the // character we were given, so here is a character specifically // for replacements: codepoint = 0xfffd; // TODO(user): this would be a good spot for a warning. } String replacement = codepoint == 0 ? "" : new String(Character.toChars(codepoint)); unicode.appendReplacement(sb, replacement); } unicode.appendTail(sb); result = sb.toString(); return result; } private static String escapeLineBreaks(String input) { Matcher linebreak = LINE_BREAK_PATTERN.matcher(input); StringBuilder sb = new StringBuilder(input.length()); int left = 0; while (linebreak.find()) { if (linebreak.start() > left) { sb.append(input.subSequence(left, linebreak.start())); } sb.append("\\00000a"); int right = linebreak.end(); if (right < input.length()) { char c = input.charAt(right); if (CONSUMABLE_WHITESPACE.matches(c)) { // add sacrificial whitespace to preserve the original sb.append(" "); } } left = right; } if (left < input.length()) { sb.append(input.subSequence(left, input.length())); } return sb.toString(); } /** * Encodes a CSS term denoting {@code raw}. In general, there are multiple * representations in CSS of the same value; we allow clients to influence * this choice through {@code discretionaryEscaper}. * * @see #HTML_ESCAPER * @see #SHORT_ESCAPER */ public static String escape( Type type, Function discretionaryEscaper, String raw) { String result = raw.replaceAll( // the Java String encoding of the regex encoding of a slash "\\\\", // the Java String encoding of a regex replacement encoding of a // CSS-escaped slash "\\\\\\\\"); result = escapeLineBreaks(result); result = discretionaryEscaper.apply(result); result = type.escapeForDelimiters(result); return result; } /** * Represents this node's value in CSS syntax that is also safe for * inclusion in HTML attribute values and element contents. This is a * good choice when you want defense in depth against client code that * fails to escape things properly. */ public static final Function HTML_ESCAPER = new Function() { public String apply(String input) { return paranoidEscapeChars(WIDE_NONASCII_PATTERN, paranoidEscapeChars( HTML_PATTERN, input)); } }; /** * Replaces characters of questionable safety in {@code context} by * CSS escape sequences that are safe for DOUBLE_QUOTED_STRING and * SINGLE_QUOTED_STRING nodes and also in HTML attribute values and * element content. This implementation's code is especially simple * in hopes of improving safety. * * @param banned a {@code Pattern} matching strings of length one * that should be escaped in the output. * @param context a {@code String} input potentially containing * codepoints that are {@code banned}. */ private static String paranoidEscapeChars(Pattern banned, String context) { StringBuffer sb = new StringBuffer(); Matcher markup = banned.matcher(context); while (markup.find()) { String match = markup.group(0); assert( // We don't insert characters from whole cloth match.length() > 0 // Our replacement accounts for the entire banned snippet, // which is one codepoint but potentially multiple UTF-16 // Java Characters. && match.length() == match.offsetByCodePoints(0, 1)); markup.appendReplacement( sb, String.format("\\\\%06x", markup.group(0).codePointAt(0))); if (markup.end() < context.length() && CONSUMABLE_WHITESPACE.matches(context.charAt(markup.end()))) { // a whitespace after an escaped character requires the // insertion of an additional whitespace between the // escape sequence and the original whitespace. sb.append(" "); } } markup.appendTail(sb); return sb.toString(); } /** * Replaces characters that have no literal representation in CSS with * their escape codes. This implementation compromises computational * efficiency in order to produce the shortest possible output for each * replaced character. This is a good choice for readability. */ public static final Function SHORT_ESCAPER = new Function() { public String apply(String input) { StringBuffer sb = new StringBuffer(); Matcher m = WIDE_NONASCII_PATTERN.matcher(input); while (m.find()) { String match = m.group(0); assert( // We don't insert characters from whole cloth match.length() > 0 // Our replacement accounts for the entire banned snippet, // which is one codepoint but potentially multiple UTF-16 // Java Characters. && match.length() == match.offsetByCodePoints(0, 1)); /* Escape codes can have up to 6 digits. We are allowed to pad * with 0s on the left. * When the escaped character ends the string, we simply * substitute the escape code for the escaped character. * Otherwise, there are two cases in which we must insert a * whitespace after our escape sequence: * (1) We have fewer than 6 digits and the escaped character * appears immediately before a hexadecimal digit in the * input. * (2) The escaped character appears immediately before a * whitespace in the input Adding the space never results in * longer CSS than adding zero padding, and sometimes it * shortens our output, so we never pad with zeroes. */ String hexDigits = String.format("%x", match.codePointAt(0)); String trailer; if (input.length() <= m.end()) { // simple: the end of the escape sequence is the end of the string. trailer = ""; } else if (hexDigits.length() < 6 && HEX_PATTERN.matcher( input.subSequence(m.end(), m.end() + 1)).matches()) { // a hex digit after a short escape sequence requires // separation by an inserted whitespace. trailer = " "; } else if (CONSUMABLE_WHITESPACE.matches(input.charAt(m.end()))) { // a whitespace after an escaped character requires the // insertion of an additional whitespace between the // escape sequence and the original whitespace. trailer = " "; } else { trailer = ""; } m.appendReplacement( sb, String.format("\\\\%s%s", hexDigits, trailer)); } m.appendTail(sb); return sb.toString(); } }; /** * Generates a CSS snippet representing this node. * This may differ in semantically unimportant ways from the snippet * from which this node was originally parsed. *

* You might reasonably {@code n.setConcreteValue(n.toString(ESC))} * because that will not change what you get from {@code n.getValue()}. * But it is probably an error to write * {@code n.setValue(n.toString(ESC))}; you can pump the string * to unbounded length by putting the latter snippet in the body * of a loop. * * @return a {@code String} corresponding to this node's * abstract value, but suitable for inclusion in CSS. * @see #getValue * @see #getConcreteValue */ public String toString( Function discretionaryEscaper) { return this.type.toString(this.getValue(), discretionaryEscaper); } /** * CSS syntax permits strings to be expressed either using * single-quotes or double-quotes. */ public enum Type { /* double-quoted string */ DOUBLE_QUOTED_STRING("\""), /* single-quoted string */ SINGLE_QUOTED_STRING("'"); public final String delimiter; public final String format; Type(String delimiter) { this.delimiter = delimiter; this.format = String.format("%s%%s%s", delimiter, delimiter); } public String toString( String value, Function discretionaryEscaper) { return String.format(format, escape(this, discretionaryEscaper, value)); } /** * Escape delimiters found in input so that they will not begin * or end new lexemes. */ public String escapeForDelimiters(String input) { return input.replaceAll( delimiter, // Java String literal encoding of a regex-replacement encoding of // a slash used to cancel the meaning of the special CSS character // (the delimiter) that follows. "\\\\" + delimiter); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy