All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.soulgalore.velocity.RemoveInvalidXMLChars Maven / Gradle / Ivy

The newest version!
package com.soulgalore.velocity;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Scanner;

/**
 * Strip invalid XML characters.
 * 
 * @author peter
 * 
 */
public class RemoveInvalidXMLChars {

  /**
   * First argument is the XML file input, seconds is the output file.
   * 
   * @param args
   * @throws FileNotFoundException
   */
  public static void main(String[] args) throws FileNotFoundException {

    if (args.length != 2) {
      System.out.println("Wrong number of arguments: input.xml output.xml");
      return;
    }

    // First check that the file exist
    File input = new File(args[0]);
    if (input.exists()) {

      String output = readTextFile(input);
      output = stripNonValidXMLCharacters(output);

      final PrintWriter out = new PrintWriter(new FileOutputStream(args[1]));
      out.write(output);
      out.close();
    } else
      System.err.println("The file " + args[0] + " doesn't exist");

  }

  /**
   * This method ensures that the output String has only valid XML unicode characters as specified
   * by the XML 1.0 standard. For reference, please see the standard. This method will
   * return an empty String if the input is null or empty.
   * 
   * @param in The String whose non-valid characters we want to remove.
   * @return The in String, stripped of non-valid characters.
   */
  public static String stripNonValidXMLCharacters(String in) {
    final StringBuffer out = new StringBuffer(); // Used to hold the output.
    char current; // Used to reference the current character.

    if (in == null || ("".equals(in))) return ""; // vacancy test.
    for (int i = 0; i < in.length(); i++) {
      current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught
      // here; it should not happen.
      if ((current == 0x9) || (current == 0xA) || (current == 0xD)
          || ((current >= 0x20) && (current <= 0xD7FF))
          || ((current >= 0xE000) && (current <= 0xFFFD))
          || ((current >= 0x10000) && (current <= 0x10FFFF))) out.append(current);
    }
    return out.toString();
  }

  public static String readTextFile(File file) {
    String returnValue = "";
    FileReader fileReader = null;
    String line = "";
    BufferedReader reader = null;
    try {
      fileReader = new FileReader(file);
      reader = new BufferedReader(fileReader);
      while ((line = reader.readLine()) != null) {
        returnValue += line + "\n";
      }
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (reader != null) {
        try {
          reader.close();
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
      }
      if (file != null) {
        try {
          fileReader.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
    }
    return returnValue;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy