All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.demandware.appsec.secure.manipulation.impl.XMLManipulator Maven / Gradle / Ivy

Go to download

Provide a set of Context-Based Encoders and Filterers in Java that allow application developers to sanitize application data for safe output or processing

The newest version!
/*
 * Copyright 2015 Demandware Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
 * file except in compliance with the License. You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language governing permissions and limitations under the
 * License.
 */
package com.demandware.appsec.secure.manipulation.impl;

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import com.demandware.appsec.secure.manipulation.AbstractCharacterManipulator;
import com.demandware.appsec.secure.manipulation.IManipulateOption;

/**
 * XMLManipulator handles all content related to XML
 *
 * @author Chris Smith
 */
public class XMLManipulator
    extends AbstractCharacterManipulator
{

    static enum XMLManipulatorOption
        implements IManipulateOption
    {
        //@formatter:off
        
        //These characters are immune to modification
        CONTENT                 ( '-' ),
        SINGLE_QUOTE_ATTRIBUTE  ( '-', '"' ),
        DOUBLE_QUOTE_ATTRIBUTE  ( '-',      '\'' ),
        COMMENT_CONTENT         (      '"', '\'', '<', '!', '>', '#', '$',
                                  '%', '^', '*',  '+', '/', '=', '?', '@',
                                  '[', '\\',']',  '{', '|', '}', '~' ),
        ;

        //These characters are immune in all contexts
        private final Character[] baseImmune =
                                {
                                    ',', ';', ':', '.', '_', ' ', '(', ')',
                                    '\t', '\n', '\r'
                                };
        
        //@formatter:on

        private final Character[] immune;

        private XMLManipulatorOption( Character... immune )
        {
            this.immune = ManipulationUtils.combineArrays( immune, this.baseImmune );
        }

        public Character[] getImmuneCharacters()
        {
            return this.immune;
        }
    }

    private static final Map characterToEntityMap = createEntityMap();

    private static final String REPLACE_HEX = ""; // for control characters, use blank, from RFC

    // only used in JUnit
    static String getReplacementHex()
    {
        return REPLACE_HEX;
    }

    XMLManipulator( XMLManipulatorOption manipulatorOption )
    {
        super( manipulatorOption );
    }

    @Override
    protected String getCorrectCharacter( Character c )
    {
        String correctedCharacter = "";
        XMLManipulatorOption opt = (XMLManipulatorOption) this.manipulatorOption;

        // If the character is alphanumeric or is immune, it is OK
        if ( ManipulationUtils.isAlphaNum( c ) || ManipulationUtils.isInList( c, opt.getImmuneCharacters() ) )
        {
            correctedCharacter = String.valueOf( c );
        }
        else
        {
            // Check if the character can be written as an entity to block attacks
            String entity = characterToEntityMap.get( c );

            if ( entity != null )
            {
                correctedCharacter = entity;
            }
            // Otherwise, replace illegal control characters with a safe replacement
            // these characters can have special meaning and are recommended to be removed by the RFC
            else if ( ( c <= 0x1f ) || // lower bounds of control characters except tab and newlines
                ( c >= 0x7f && c <= 0x84 ) || // DEL through APC control characters,
                ( c >= 0x86 && c <= 0x9f ) || // (still allows NEL character)
                ( c >= 0xfdd0 && c <= 0xfddf ) ) // more control chars
            {
                correctedCharacter = REPLACE_HEX;
            }
            // Otherwise encode the character in hex
            else
            {
                correctedCharacter = "&#x" + ManipulationUtils.getHexForCharacter( c ) + ";";
            }
        }
        return correctedCharacter;
    }

    /**
     * Small unmodifiable map of entity mappings
     * 
     * @return
     */
    private static Map createEntityMap()
    {
        Map map = new HashMap( 4 );
        map.put( (char) 34, """ ); /* quotation mark */
        map.put( (char) 38, "&" ); /* ampersand */
        map.put( (char) 39, "'" ); /* single quote*/
        map.put( (char) 60, "<" ); /* less-than sign */
        map.put( (char) 62, ">" ); /* greater-than sign */
        return Collections.unmodifiableMap( map );
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy