All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dev.amp.validator.AMPHtmlHandler Maven / Gradle / Ivy

There is a newer version: 1.0.42
Show newest version
/*
 *
 * ====================================================================
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *  ====================================================================
 */

/*
 * Changes to the original project are Copyright 2019, Yahoo Inc..
 */

package dev.amp.validator;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import dev.amp.validator.css.CssValidationException;
import dev.amp.validator.exception.ExitOnFirstErrorException;
import dev.amp.validator.exception.MaxParseNodesException;
import dev.amp.validator.exception.TagValidationException;
import dev.amp.validator.exception.ValidatorException;
import dev.amp.validator.utils.TagSpecUtils;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

/**
 * Validation handler which accepts callbacks from HTML parser.
 *
 * @author nhant01
 * @author GeorgeLuo
 */

public class AMPHtmlHandler extends DefaultHandler {
    /**
     * Creates an AMPHtmlHandler.
     *
     * @param validatorManager the validator manager instance.
     * @param htmlFormat       HtmlFormat code.
     * @param condition        exit condition.
     * @param maxNodesAllowed  max nodes allowed.
     * @param docByteSize length of html document.
     */
    public AMPHtmlHandler(@Nonnull final AMPValidatorManager validatorManager,
                          @Nonnull final ValidatorProtos.HtmlFormat.Code htmlFormat, @Nonnull final ExitCondition condition,
                          final int maxNodesAllowed, final int docByteSize) {
        this.validatorManager = validatorManager;
        this.exitCondition = condition;
        this.maxNodesAllowed = maxNodesAllowed;
        this.htmlFormat = htmlFormat;
        this.validationResult = ValidatorProtos.ValidationResult.newBuilder();
        context = new Context(new ParsedValidatorRules(htmlFormat, validatorManager), docByteSize);
    }

    /**
     * Processing the beginning of the document.
     *
     * @exception SAXException Any SAX exception.
     */
    @Override
    public void startDocument() throws SAXException {
        validationResult.setStatus(ValidatorProtos.ValidationResult.Status.UNKNOWN);
    }

    /**
     * Processing the end of the document.
     *
     * @exception SAXException Any SAX exception.
     */
    @Override
    public void endDocument() throws SAXException {
        try {
            context.getRules().maybeEmitGlobalTagValidationErrors(context, validationResult);
            setValidationResultStatus();
        } catch (TagValidationException tve) {
            /** ignore */
        }
    }

    /**
     * Processing the start of an element.
     *
     * @param uri The Namespace URI, or the empty string if the
     *        element has no Namespace URI or if Namespace
     *        processing is not being performed.
     * @param localName The local name (without prefix), or the
     *        empty string if Namespace processing is not being
     *        performed.
     * @param qName The qualified name (with prefix), or the
     *        empty string if qualified names are not available.
     * @param attributes The attributes attached to the element.  If
     *        there are no attributes, it shall be an empty
     *        Attributes object.
     * @exception SAXException Any SAX exception
     */
    @Override
    public void startElement(final String uri, final String localName,
                             final String qName, final Attributes attributes) throws SAXException {
        if (this.maxNodesAllowed > 0 && this.totalNodes > this.maxNodesAllowed) {
            throw new MaxParseNodesException();
        }

        this.totalNodes++;

        this.encounteredTag = new ParsedHtmlTag(localName, attributes);
        if (encounteredTag.upperName().equals("HTML")) {
            this.context.getRules().validateHtmlTag(
                    encounteredTag, this.context, this.validationResult);
        }

        // TODO: discuss how to handle this warning (Attributes class dedupes)
        final String maybeDuplicateAttrName = encounteredTag.hasDuplicateAttrs();
        if (maybeDuplicateAttrName != null) {
            final List params = new ArrayList<>();
            params.add(encounteredTag.lowerName());
            params.add(maybeDuplicateAttrName);
            this.context.addWarning(
                    ValidatorProtos.ValidationError.Code.DUPLICATE_ATTRIBUTE,
                    this.context.getLineCol(),
                    params,
                    /* specUrl */ "",
                    this.validationResult);
            //TODO - tagchowder doesn't seem to maintain duplicate attributes.
            //encounteredTag.dedupeAttrs();
        }

        if (encounteredTag.upperName().equals("BODY")) {
            this.emitMissingExtensionErrors();
        }

        try {
            ValidateTagResult resultForReferencePoint =
                    new ValidateTagResult(ValidatorProtos.ValidationResult.newBuilder(), null);
            resultForReferencePoint.getValidationResult().setStatus(ValidatorProtos.ValidationResult.Status.UNKNOWN);

            final ReferencePointMatcher referencePointMatcher = context.getTagStack().parentReferencePointMatcher();
            if (referencePointMatcher != null) {
                resultForReferencePoint = referencePointMatcher.validateTag(encounteredTag, context);
            }

            final ValidateTagResult resultForTag =
                    TagSpecUtils.validateTag(context, encounteredTag, resultForReferencePoint.getBestMatchTagSpec());
            if (referencePointMatcher != null
                    && (resultForTag.getValidationResult().getStatus() == ValidatorProtos.ValidationResult.Status.PASS)) {
                this.validationResult.mergeFrom(resultForReferencePoint.getValidationResult().build());
            }

            checkForReferencePointCollision(
                    resultForReferencePoint.getBestMatchTagSpec(),
                    resultForTag.getBestMatchTagSpec(),
                    resultForTag.getValidationResult());

            this.validationResult.mergeFrom(resultForTag.getValidationResult().build());

            this.context.updateFromTagResults(encounteredTag, resultForReferencePoint, resultForTag);

            if (this.validationResult.getStatus() == ValidatorProtos.ValidationResult.Status.FAIL
                    && exitCondition == ExitCondition.EXIT_ON_FIRST_ERROR) {
                throw new ExitOnFirstErrorException();
            }
        } catch (TagValidationException | ValidatorException | IOException | CssValidationException ex) {
            /** ignore */
        }
    }

    /**
     * Processing the end of an element.
     *
     * @param uri The Namespace URI, or the empty string if the
     *        element has no Namespace URI or if Namespace
     *        processing is not being performed.
     * @param localName The local name (without prefix), or the
     *        empty string if Namespace processing is not being performed.
     * @param qName The qualified name (with prefix), or the
     *        empty string if qualified names are not available.
     */
    @Override
    public void endElement(final String uri, final String localName, final String qName) {
        if (encounteredTag != null) {
            try {
                if (charactersBuilder != null) {
                    cdata(charactersBuilder.toString());
                }
            } catch (TagValidationException | CssValidationException | IOException e) {
                /** ignore */
            }
            encounteredTag.cleanup();
        }
        try {
            this.context.getTagStack().exitTag(this.context, this.validationResult);
        } catch (TagValidationException tve) {
            /** ignore */
        }

        charactersBuilder = null;
        encounteredTag = null;
    }

    /**
     * Receive a Locator object for document events.
     *
     * @param locator A locator for all SAX document events.
     */
    @Override
    public void setDocumentLocator(@Nonnull final Locator locator) {
        this.context.setLineCol(locator);
    }

    /**
     * Returns the validation result.
     *
     * @return returns the validation result.
     */
    public ValidatorProtos.ValidationResult.Builder validationResult() {
        return validationResult;
    }

    /**
     * While parsing the document HEAD, we may accumulate errors which depend
     * on seeing later extension script tags.
     */
    public void emitMissingExtensionErrors() {
        final ExtensionsContext extensionsCtx = this.context.getExtensions();
        for (ValidatorProtos.ValidationError error : extensionsCtx.missingExtensionErrors()) {
            this.context.addBuiltError(error, this.validationResult);
        }
    }

    /**
     * Considering that reference points could be defined by both reference
     * points and regular tag specs, check that we don't have matchers assigned
     * from both, there can be only one.
     *
     * @param refPointSpec a reference point parsed tag spec.
     * @param tagSpec a parsed tag spec.
     * @param validationResult a ValidationResult.
     */
    private void checkForReferencePointCollision(
            final ParsedTagSpec refPointSpec, final ParsedTagSpec tagSpec,
            @Nonnull final ValidatorProtos.ValidationResult.Builder validationResult) {
        if (refPointSpec == null || !refPointSpec.hasReferencePoints()) {
            return;
        }

        if (tagSpec == null || !tagSpec.hasReferencePoints()) {
            return;
        }

        final List params = new ArrayList<>();
        params.add(TagSpecUtils.getTagSpecName(tagSpec.getSpec()));
        params.add(refPointSpec.getReferencePoints().parentTagSpecName());
        context.addError(
                ValidatorProtos.ValidationError.Code.TAG_REFERENCE_POINT_CONFLICT,
                context.getLineCol(),
                params,
                refPointSpec.getReferencePoints().parentSpecUrl(),
                validationResult);
    }

    /**
     * If the errors list contain at least one ValidatorProtos.ValidationError.Severity.ERROR,
     * set the status to ValidatorProtos.ValidationResult.Status.FAIL.
     */
    private void setValidationResultStatus() {
        validationResult.setStatus(ValidatorProtos.ValidationResult.Status.PASS);
        if (validationResult.getErrorsList().isEmpty()) {
            return;
        }
        Optional validationError =
                validationResult.getErrorsList().stream()
                        .filter(r -> r.getSeverity() == ValidatorProtos.ValidationError.Severity.ERROR)
                        .findFirst();
        if (validationError.isPresent()) {
            validationResult.setStatus(ValidatorProtos.ValidationResult.Status.FAIL);
        }
    }

    /**
     * Callback for cdata.
     *
     * @param text the css content to validate
     * @throws TagValidationException tag validation exception.
     * @throws CssValidationException css validation exception.
     * @throws IOException IO exception.
     */
    public void cdata(@Nonnull final String text) throws TagValidationException, CssValidationException, IOException {
        // Validate that JSON can be parsed.
        if (!this.context.getTagStack().hasAncestor("TEMPLATE")
          && this.context.getTagStack().isScriptTypeJsonChild()) {
            try {
                OBJECT_MAPPER.readTree(text);
            } catch (JsonProcessingException e) {
                List params = new ArrayList<>();
                this.context.addWarning(
                        ValidatorProtos.ValidationError.Code.INVALID_JSON_CDATA,
                        this.context.getLineCol(),
                        params, "",
                        this.validationResult);
            }
        }
        final CdataMatcher matcher = this.context.getTagStack().cdataMatcher();
        if (matcher != null) {
            matcher.match(text, this.context, this.validationResult);
        }
    }

    /**
     * Processing character data inside an element.
     *
     * @param ch The characters.
     * @param start The start position in the character array.
     * @param length The number of characters to use from the character array.
     */
    @Override
    public void characters(final char[] ch, final int start, final int length) {
        if (this.encounteredTag != null) {
            if (charactersBuilder == null) {
                this.charactersBuilder = new StringBuilder();
            }
            charactersBuilder.append(new String(ch, start, length));
        }
    }

    /**
     * AMPValidatorManager object.
     */
    @Nonnull
    private final AMPValidatorManager validatorManager;

    /**
     * ExitCondition object.
     */
    @Nonnull
    private final ExitCondition exitCondition;

    /**
     * Max nodes.
     */
    private int maxNodesAllowed;

    /**
     * Total nodes.
     */
    private int totalNodes;

    /**
     * ValidationResult object.
     */
    @Nonnull
    private ValidatorProtos.ValidationResult.Builder validationResult;

    /**
     * HtmlFormat used to validate against.
     */
    @Nonnull
    private final ValidatorProtos.HtmlFormat.Code htmlFormat;

    /**
     * Context object capturing session variables of the current validation.
     */
    @Nonnull
    private final Context context;

    /**
     * Encountered tag.
     */
    private ParsedHtmlTag encounteredTag;

    /**
     * Characters can be called multiple times per tag.
     */
    private StringBuilder charactersBuilder;

    /**
     * Jackson ObjectMapper instance.
     */
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy