All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.soulgalore.velocity.RemoveInvalidXMLChars Maven / Gradle / Ivy

package com.soulgalore.velocity;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Scanner;

/**
 * Strip invalid XML characters.
 * 
 * @author peter
 * 
 */
public class RemoveInvalidXMLChars {

	/**
	 * First argument is the XML file input, seconds is the output file.
	 * 
	 * @param args
	 * @throws FileNotFoundException
	 */
	public static void main(String[] args) throws FileNotFoundException {

		if (args.length != 2) {
			System.out
					.println("Wrong number of arguments: input.xml output.xml");
			return;
		}

		// First check that the file exist
		File input = new File(args[0]);
		if (input.exists()) {
			
		String output = readTextFile(input);
		output = stripNonValidXMLCharacters(output);

		final PrintWriter out = new PrintWriter(new FileOutputStream(args[1]));
		out.write(output);
		out.close();
		}
		else 
			System.err.println("The file " + args[0] + " doesn't exist");
		
	}

	/**
	 * This method ensures that the output String has only valid XML unicode
	 * characters as specified by the XML 1.0 standard. For reference, please
	 * see the
	 * standard. This method will return an empty String if the input is
	 * null or empty.
	 * 
	 * @param in
	 *            The String whose non-valid characters we want to remove.
	 * @return The in String, stripped of non-valid characters.
	 */
	public static String stripNonValidXMLCharacters(String in) {
		final StringBuffer out = new StringBuffer(); // Used to hold the output.
		char current; // Used to reference the current character.

		if (in == null || ("".equals(in)))
			return ""; // vacancy test.
		for (int i = 0; i < in.length(); i++) {
			current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught
									// here; it should not happen.
			if ((current == 0x9) || (current == 0xA) || (current == 0xD)
					|| ((current >= 0x20) && (current <= 0xD7FF))
					|| ((current >= 0xE000) && (current <= 0xFFFD))
					|| ((current >= 0x10000) && (current <= 0x10FFFF)))
				out.append(current);
		}
		return out.toString();
	}
	
	public static String readTextFile(File file) {
		String returnValue = "";
		FileReader fileReader = null;
		String line = "";
		BufferedReader reader = null;
		try {
			fileReader = new FileReader(file);
			reader = new BufferedReader(fileReader);
			while ((line = reader.readLine()) != null) {
				returnValue += line + "\n";
			}
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (reader != null) {
				try {
					reader.close();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
			if (file != null) {
				try {
					fileReader.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return returnValue;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy