All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.http.HtmlQuoting Maven / Gradle / Ivy

There is a newer version: 3.4.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.http;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;

/**
 * This class is responsible for quoting HTML characters.
 */
public class HtmlQuoting {
  private static final byte[] AMP_BYTES =
      "&".getBytes(StandardCharsets.UTF_8);
  private static final byte[] APOS_BYTES =
      "'".getBytes(StandardCharsets.UTF_8);
  private static final byte[] GT_BYTES =
      ">".getBytes(StandardCharsets.UTF_8);
  private static final byte[] LT_BYTES =
      "<".getBytes(StandardCharsets.UTF_8);
  private static final byte[] QUOT_BYTES =
      """.getBytes(StandardCharsets.UTF_8);

  /**
   * Does the given string need to be quoted?
   * @param data the string to check
   * @param off the starting position
   * @param len the number of bytes to check
   * @return does the string contain any of the active html characters?
   */
  public static boolean needsQuoting(byte[] data, int off, int len) {
    for(int i=off; i< off+len; ++i) {
      switch(data[i]) {
      case '&':
      case '<':
      case '>':
      case '\'':
      case '"':
        return true;
      default:
        break;
      }
    }
    return false;
  }

  /**
   * Does the given string need to be quoted?
   * @param str the string to check
   * @return does the string contain any of the active html characters?
   */
  public static boolean needsQuoting(String str) {
    if (str == null) {
      return false;
    }
    byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
    return needsQuoting(bytes, 0 , bytes.length);
  }

  /**
   * Quote all of the active HTML characters in the given string as they
   * are added to the buffer.
   * @param output the stream to write the output to
   * @param buffer the byte array to take the characters from
   * @param off the index of the first byte to quote
   * @param len the number of bytes to quote
   */
  public static void quoteHtmlChars(OutputStream output, byte[] buffer,
                                    int off, int len) throws IOException {
    for(int i=off; i < off+len; i++) {
      switch (buffer[i]) {
      case '&':
        output.write(AMP_BYTES);
        break;
      case '<':
        output.write(LT_BYTES);
        break;
      case '>':
        output.write(GT_BYTES);
        break;
      case '\'':
        output.write(APOS_BYTES);
        break;
      case '"':
        output.write(QUOT_BYTES);
        break;
      default: output.write(buffer, i, 1);
      }
    }
  }
  
  /**
   * Quote the given item to make it html-safe.
   * @param item the string to quote
   * @return the quoted string
   */
  public static String quoteHtmlChars(String item) {
    if (item == null) {
      return null;
    }
    byte[] bytes = item.getBytes(StandardCharsets.UTF_8);
    if (needsQuoting(bytes, 0, bytes.length)) {
      ByteArrayOutputStream buffer = new ByteArrayOutputStream();
      try {
        quoteHtmlChars(buffer, bytes, 0, bytes.length);
        return buffer.toString("UTF-8");
      } catch (IOException ioe) {
        // Won't happen, since it is a bytearrayoutputstream
        return null;
      }
    } else {
      return item;
    }
  }

  /**
   * Return an output stream that quotes all of the output.
   * @param out the stream to write the quoted output to
   * @return a new stream that the application show write to
   * @throws IOException if the underlying output fails
   */
  public static OutputStream quoteOutputStream(final OutputStream out
                                               ) throws IOException {
    return new OutputStream() {
      private byte[] data = new byte[1];
      @Override
      public void write(byte[] data, int off, int len) throws IOException {
        quoteHtmlChars(out, data, off, len);
      }
      
      @Override
      public void write(int b) throws IOException {
        data[0] = (byte) b;
        quoteHtmlChars(out, data, 0, 1);
      }
      
      @Override
      public void flush() throws IOException {
        out.flush();
      }
      
      @Override
      public void close() throws IOException {
        out.close();
      }
    };
  }

  /**
   * Remove HTML quoting from a string.
   * @param item the string to unquote
   * @return the unquoted string
   */
  public static String unquoteHtmlChars(String item) {
    if (item == null) {
      return null;
    }
    int next = item.indexOf('&');
    // nothing was quoted
    if (next == -1) {
      return item;
    }
    int len = item.length();
    int posn = 0;
    StringBuilder buffer = new StringBuilder();
    while (next != -1) {
      buffer.append(item.substring(posn, next));
      if (item.startsWith("&", next)) {
        buffer.append('&');
        next += 5;
      } else if (item.startsWith("'", next)) {
        buffer.append('\'');
        next += 6;        
      } else if (item.startsWith(">", next)) {
        buffer.append('>');
        next += 4;
      } else if (item.startsWith("<", next)) {
        buffer.append('<');
        next += 4;
      } else if (item.startsWith(""", next)) {
        buffer.append('"');
        next += 6;
      } else {
        int end = item.indexOf(';', next)+1;
        if (end == 0) {
          end = len;
        }
        throw new IllegalArgumentException("Bad HTML quoting for " + 
                                           item.substring(next,end));
      }
      posn = next;
      next = item.indexOf('&', posn);
    }
    buffer.append(item.substring(posn, len));
    return buffer.toString();
  }
  
  public static void main(String[] args) throws Exception {
    for(String arg:args) {
      System.out.println("Original: " + arg);
      String quoted = quoteHtmlChars(arg);
      System.out.println("Quoted: "+ quoted);
      String unquoted = unquoteHtmlChars(quoted);
      System.out.println("Unquoted: " + unquoted);
      System.out.println();
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy