com.soulgalore.velocity.RemoveInvalidXMLChars Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xml-velocity Show documentation
Show all versions of xml-velocity Show documentation
Merge a XML file with a Velocity template
The newest version!
package com.soulgalore.velocity;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Scanner;
/**
* Strip invalid XML characters.
*
* @author peter
*
*/
public class RemoveInvalidXMLChars {
/**
* First argument is the XML file input, seconds is the output file.
*
* @param args
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
if (args.length != 2) {
System.out.println("Wrong number of arguments: input.xml output.xml");
return;
}
// First check that the file exist
File input = new File(args[0]);
if (input.exists()) {
String output = readTextFile(input);
output = stripNonValidXMLCharacters(output);
final PrintWriter out = new PrintWriter(new FileOutputStream(args[1]));
out.write(output);
out.close();
} else
System.err.println("The file " + args[0] + " doesn't exist");
}
/**
* This method ensures that the output String has only valid XML unicode characters as specified
* by the XML 1.0 standard. For reference, please see the standard. This method will
* return an empty String if the input is null or empty.
*
* @param in The String whose non-valid characters we want to remove.
* @return The in String, stripped of non-valid characters.
*/
public static String stripNonValidXMLCharacters(String in) {
final StringBuffer out = new StringBuffer(); // Used to hold the output.
char current; // Used to reference the current character.
if (in == null || ("".equals(in))) return ""; // vacancy test.
for (int i = 0; i < in.length(); i++) {
current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught
// here; it should not happen.
if ((current == 0x9) || (current == 0xA) || (current == 0xD)
|| ((current >= 0x20) && (current <= 0xD7FF))
|| ((current >= 0xE000) && (current <= 0xFFFD))
|| ((current >= 0x10000) && (current <= 0x10FFFF))) out.append(current);
}
return out.toString();
}
public static String readTextFile(File file) {
String returnValue = "";
FileReader fileReader = null;
String line = "";
BufferedReader reader = null;
try {
fileReader = new FileReader(file);
reader = new BufferedReader(fileReader);
while ((line = reader.readLine()) != null) {
returnValue += line + "\n";
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (file != null) {
try {
fileReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return returnValue;
}
}