All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.main.java.com.dd.plist.ASCIIPropertyListParser Maven / Gradle / Ivy

Go to download

This library enables Java applications to work with property lists in various formats. Supported formats for reading and writing are OS X/iOS binary and XML property lists. ASCII property lists are also supported. The library also provides access to basic functions of NeXTSTEP/Cocoa classes like NSDictionary, NSArray, etc.

There is a newer version: 1.28
Show newest version
/*
 * plist - An open source library to parse and generate property lists
 * Copyright (C) 2014 Daniel Dreibrodt
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package com.dd.plist;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.text.ParseException;
import java.text.StringCharacterIterator;
import java.util.LinkedList;
import java.util.List;

/**
 * 

* Parser for ASCII property lists. Supports Apple OS X/iOS and GnuStep/NeXTSTEP format. * This parser is based on the recursive descent paradigm, but the underlying grammar * is not explicitely defined. *

*

* Resources on ASCII property list format: *

* * @author Daniel Dreibrodt */ public class ASCIIPropertyListParser { /** * Parses an ASCII property list file. * * @param f The ASCII property list file. * @return The root object of the property list. This is usually a NSDictionary but can also be a NSArray. * @throws java.text.ParseException When an error occurs during parsing. * @throws java.io.IOException When an error occured while reading from the input stream. */ public static NSObject parse(File f) throws IOException, ParseException { return parse(new FileInputStream(f)); } /** * Parses an ASCII property list from an input stream. * * @param in The input stream that points to the property list's data. * @return The root object of the property list. This is usually a NSDictionary but can also be a NSArray. * @throws java.text.ParseException When an error occurs during parsing. * @throws java.io.IOException When an error occured while reading from the input stream. */ public static NSObject parse(InputStream in) throws ParseException, IOException { byte[] buf = PropertyListParser.readAll(in); in.close(); return parse(buf); } /** * Parses an ASCII property list from a byte array. * * @param bytes The ASCII property list data. * @return The root object of the property list. This is usually a NSDictionary but can also be a NSArray. * @throws ParseException When an error occurs during parsing. */ public static NSObject parse(byte[] bytes) throws ParseException { ASCIIPropertyListParser parser = new ASCIIPropertyListParser(bytes); return parser.parse(); } public static final char WHITESPACE_SPACE = ' '; public static final char WHITESPACE_TAB = '\t'; public static final char WHITESPACE_NEWLINE = '\n'; public static final char WHITESPACE_CARRIAGE_RETURN = '\r'; public static final char ARRAY_BEGIN_TOKEN = '('; public static final char ARRAY_END_TOKEN = ')'; public static final char ARRAY_ITEM_DELIMITER_TOKEN = ','; public static final char DICTIONARY_BEGIN_TOKEN = '{'; public static final char DICTIONARY_END_TOKEN = '}'; public static final char DICTIONARY_ASSIGN_TOKEN = '='; public static final char DICTIONARY_ITEM_DELIMITER_TOKEN = ';'; public static final char QUOTEDSTRING_BEGIN_TOKEN = '"'; public static final char QUOTEDSTRING_END_TOKEN = '"'; public static final char QUOTEDSTRING_ESCAPE_TOKEN = '\\'; public static final char DATA_BEGIN_TOKEN = '<'; public static final char DATA_END_TOKEN = '>'; public static final char DATA_GSOBJECT_BEGIN_TOKEN = '*'; public static final char DATA_GSDATE_BEGIN_TOKEN = 'D'; public static final char DATA_GSBOOL_BEGIN_TOKEN = 'B'; public static final char DATA_GSBOOL_TRUE_TOKEN = 'Y'; public static final char DATA_GSBOOL_FALSE_TOKEN = 'N'; public static final char DATA_GSINT_BEGIN_TOKEN = 'I'; public static final char DATA_GSREAL_BEGIN_TOKEN = 'R'; public static final char DATE_DATE_FIELD_DELIMITER = '-'; public static final char DATE_TIME_FIELD_DELIMITER = ':'; public static final char DATE_GS_DATE_TIME_DELIMITER = ' '; public static final char DATE_APPLE_DATE_TIME_DELIMITER = 'T'; public static final char DATE_APPLE_END_TOKEN = 'Z'; public static final char COMMENT_BEGIN_TOKEN = '/'; public static final char MULTILINE_COMMENT_SECOND_TOKEN = '*'; public static final char SINGLELINE_COMMENT_SECOND_TOKEN = '/'; public static final char MULTILINE_COMMENT_END_TOKEN = '/'; /** * Property list source data */ private byte[] data; /** * Current parsing index */ private int index; /** * Only allow subclasses to change instantiation. */ protected ASCIIPropertyListParser() { } /** * Creates a new parser for the given property list content. * * @param propertyListContent The content of the property list that is to be parsed. */ private ASCIIPropertyListParser(byte[] propertyListContent) { data = propertyListContent; } /** * Checks whether the given sequence of symbols can be accepted. * * @param sequence The sequence of tokens to look for. * @return Whether the given tokens occur at the current parsing position. */ private boolean acceptSequence(char... sequence) { for (int i = 0; i < sequence.length; i++) { if (data[index + i] != sequence[i]) return false; } return true; } /** * Checks whether the given symbols can be accepted, that is, if one * of the given symbols is found at the current parsing position. * * @param acceptableSymbols The symbols to check. * @return Whether one of the symbols can be accepted or not. */ private boolean accept(char... acceptableSymbols) { boolean symbolPresent = false; for (char c : acceptableSymbols) { if (data[index] == c) symbolPresent = true; } return symbolPresent; } /** * Checks whether the given symbol can be accepted, that is, if * the given symbols is found at the current parsing position. * * @param acceptableSymbol The symbol to check. * @return Whether the symbol can be accepted or not. */ private boolean accept(char acceptableSymbol) { return data[index] == acceptableSymbol; } /** * Expects the input to have one of the given symbols at the current parsing position. * * @param expectedSymbols The expected symbols. * @throws ParseException If none of the expected symbols could be found. */ private void expect(char... expectedSymbols) throws ParseException { if (!accept(expectedSymbols)) { String excString = "Expected '" + expectedSymbols[0] + "'"; for (int i = 1; i < expectedSymbols.length; i++) { excString += " or '" + expectedSymbols[i] + "'"; } excString += " but found '" + (char) data[index] + "'"; throw new ParseException(excString, index); } } /** * Expects the input to have the given symbol at the current parsing position. * * @param expectedSymbol The expected symbol. * @throws ParseException If the expected symbol could be found. */ private void expect(char expectedSymbol) throws ParseException { if (!accept(expectedSymbol)) throw new ParseException("Expected '" + expectedSymbol + "' but found '" + (char) data[index] + "'", index); } /** * Reads an expected symbol. * * @param symbol The symbol to read. * @throws ParseException If the expected symbol could not be read. */ private void read(char symbol) throws ParseException { expect(symbol); index++; } /** * Skips the current symbol. */ private void skip() { index++; } /** * Skips several symbols * * @param numSymbols The amount of symbols to skip. */ private void skip(int numSymbols) { index += numSymbols; } /** * Skips all whitespaces and comments from the current parsing position onward. */ private void skipWhitespacesAndComments() { boolean commentSkipped; do { commentSkipped = false; //Skip whitespaces while (accept(WHITESPACE_CARRIAGE_RETURN, WHITESPACE_NEWLINE, WHITESPACE_SPACE, WHITESPACE_TAB)) { skip(); } //Skip single line comments "//..." if (acceptSequence(COMMENT_BEGIN_TOKEN, SINGLELINE_COMMENT_SECOND_TOKEN)) { skip(2); readInputUntil(WHITESPACE_CARRIAGE_RETURN, WHITESPACE_NEWLINE); commentSkipped = true; } //Skip multi line comments "/* ... */" else if (acceptSequence(COMMENT_BEGIN_TOKEN, MULTILINE_COMMENT_SECOND_TOKEN)) { skip(2); while (true) { if (acceptSequence(MULTILINE_COMMENT_SECOND_TOKEN, MULTILINE_COMMENT_END_TOKEN)) { skip(2); break; } skip(); } commentSkipped = true; } } while (commentSkipped); //if a comment was skipped more whitespace or another comment can follow, so skip again } /** * Reads input until one of the given symbols is found. * * @param symbols The symbols that can occur after the string to read. * @return The input until one the given symbols. */ private String readInputUntil(char... symbols) { String s = ""; while (!accept(symbols)) { s += (char) data[index]; skip(); } return s; } /** * Reads input until the given symbol is found. * * @param symbol The symbol that can occur after the string to read. * @return The input until the given symbol. */ private String readInputUntil(char symbol) { String s = ""; while (!accept(symbol)) { s += (char) data[index]; skip(); } return s; } /** * Parses the property list from the beginning and returns the root object * of the property list. * * @return The root object of the property list. This can either be a NSDictionary or a NSArray. * @throws ParseException When an error occured during parsing */ public NSObject parse() throws ParseException { index = 0; //Skip Unicode byte order mark (BOM) if(data.length >= 3 && (data[0] & 0xFF) == 0xEF && (data[1] & 0xFF) == 0xBB && (data[2] & 0xFF) == 0xBF) skip(3); skipWhitespacesAndComments(); expect(DICTIONARY_BEGIN_TOKEN, ARRAY_BEGIN_TOKEN, COMMENT_BEGIN_TOKEN); try { return parseObject(); } catch (ArrayIndexOutOfBoundsException ex) { throw new ParseException("Reached end of input unexpectedly.", index); } } /** * Parses the NSObject found at the current position in the property list * data stream. * * @return The parsed NSObject. * @see ASCIIPropertyListParser#index */ private NSObject parseObject() throws ParseException { switch (data[index]) { case ARRAY_BEGIN_TOKEN: { return parseArray(); } case DICTIONARY_BEGIN_TOKEN: { return parseDictionary(); } case DATA_BEGIN_TOKEN: { return parseData(); } case QUOTEDSTRING_BEGIN_TOKEN: { String quotedString = parseQuotedString(); //apple dates are quoted strings of length 20 and after the 4 year digits a dash is found if (quotedString.length() == 20 && quotedString.charAt(4) == DATE_DATE_FIELD_DELIMITER) { try { return new NSDate(quotedString); } catch (Exception ex) { //not a date? --> return string return new NSString(quotedString); } } else { return new NSString(quotedString); } } default: { //0-9 if (data[index] > 0x2F && data[index] < 0x3A) { //could be a date or just a string return parseDateString(); } else { //non-numerical -> string or boolean String parsedString = parseString(); return new NSString(parsedString); } } } } /** * Parses an array from the current parsing position. * The prerequisite for calling this method is, that an array begin token has been read. * * @return The array found at the parsing position. */ private NSArray parseArray() throws ParseException { //Skip begin token skip(); skipWhitespacesAndComments(); List objects = new LinkedList(); while (!accept(ARRAY_END_TOKEN)) { objects.add(parseObject()); skipWhitespacesAndComments(); if (accept(ARRAY_ITEM_DELIMITER_TOKEN)) { skip(); } else { break; //must have reached end of array } skipWhitespacesAndComments(); } //parse end token read(ARRAY_END_TOKEN); return new NSArray(objects.toArray(new NSObject[objects.size()])); } /** * Parses a dictionary from the current parsing position. * The prerequisite for calling this method is, that a dictionary begin token has been read. * * @return The dictionary found at the parsing position. */ private NSDictionary parseDictionary() throws ParseException { //Skip begin token skip(); skipWhitespacesAndComments(); NSDictionary dict = new NSDictionary(); while (!accept(DICTIONARY_END_TOKEN)) { //Parse key String keyString; if (accept(QUOTEDSTRING_BEGIN_TOKEN)) { keyString = parseQuotedString(); } else { keyString = parseString(); } skipWhitespacesAndComments(); //Parse assign token read(DICTIONARY_ASSIGN_TOKEN); skipWhitespacesAndComments(); NSObject object = parseObject(); dict.put(keyString, object); skipWhitespacesAndComments(); read(DICTIONARY_ITEM_DELIMITER_TOKEN); skipWhitespacesAndComments(); } //skip end token skip(); return dict; } /** * Parses a data object from the current parsing position. * This can either be a NSData object or a GnuStep NSNumber or NSDate. * The prerequisite for calling this method is, that a data begin token has been read. * * @return The data object found at the parsing position. */ private NSObject parseData() throws ParseException { NSObject obj = null; //Skip begin token skip(); if (accept(DATA_GSOBJECT_BEGIN_TOKEN)) { skip(); expect(DATA_GSBOOL_BEGIN_TOKEN, DATA_GSDATE_BEGIN_TOKEN, DATA_GSINT_BEGIN_TOKEN, DATA_GSREAL_BEGIN_TOKEN); if (accept(DATA_GSBOOL_BEGIN_TOKEN)) { //Boolean skip(); expect(DATA_GSBOOL_TRUE_TOKEN, DATA_GSBOOL_FALSE_TOKEN); if (accept(DATA_GSBOOL_TRUE_TOKEN)) { obj = new NSNumber(true); } else { obj = new NSNumber(false); } //Skip the parsed boolean token skip(); } else if (accept(DATA_GSDATE_BEGIN_TOKEN)) { //Date skip(); String dateString = readInputUntil(DATA_END_TOKEN); obj = new NSDate(dateString); } else if (accept(DATA_GSINT_BEGIN_TOKEN, DATA_GSREAL_BEGIN_TOKEN)) { //Number skip(); String numberString = readInputUntil(DATA_END_TOKEN); obj = new NSNumber(numberString); } //parse data end token read(DATA_END_TOKEN); } else { String dataString = readInputUntil(DATA_END_TOKEN); dataString = dataString.replaceAll("\\s+", ""); int numBytes = dataString.length() / 2; byte[] bytes = new byte[numBytes]; for (int i = 0; i < bytes.length; i++) { String byteString = dataString.substring(i * 2, i * 2 + 2); int byteValue = Integer.parseInt(byteString, 16); bytes[i] = (byte) byteValue; } obj = new NSData(bytes); //skip end token skip(); } return obj; } /** * Attempts to parse a plain string as a date if possible. * * @return A NSDate if the string represents such an object. Otherwise a NSString is returned. */ private NSObject parseDateString() { String numericalString = parseString(); if (numericalString.length() > 4 && numericalString.charAt(4) == DATE_DATE_FIELD_DELIMITER) { try { return new NSDate(numericalString); } catch(Exception ex) { //An exception occurs if the string is not a date but just a string } } return new NSString(numericalString); } /** * Parses a plain string from the current parsing position. * The string is made up of all characters to the next whitespace, delimiter token or assignment token. * * @return The string found at the current parsing position. */ private String parseString() { return readInputUntil(WHITESPACE_SPACE, WHITESPACE_TAB, WHITESPACE_NEWLINE, WHITESPACE_CARRIAGE_RETURN, ARRAY_ITEM_DELIMITER_TOKEN, DICTIONARY_ITEM_DELIMITER_TOKEN, DICTIONARY_ASSIGN_TOKEN, ARRAY_END_TOKEN); } /** * Parses a quoted string from the current parsing position. * The prerequisite for calling this method is, that a quoted string begin token has been read. * * @return The quoted string found at the parsing method with all special characters unescaped. * @throws ParseException If an error occured during parsing. */ private String parseQuotedString() throws ParseException { //Skip begin token skip(); String quotedString = ""; boolean unescapedBackslash = true; //Read from opening quotation marks to closing quotation marks and skip escaped quotation marks while (data[index] != QUOTEDSTRING_END_TOKEN || (data[index - 1] == QUOTEDSTRING_ESCAPE_TOKEN && unescapedBackslash)) { quotedString += (char) data[index]; if (accept(QUOTEDSTRING_ESCAPE_TOKEN)) { unescapedBackslash = !(data[index - 1] == QUOTEDSTRING_ESCAPE_TOKEN && unescapedBackslash); } skip(); } String unescapedString; try { unescapedString = parseQuotedString(quotedString); } catch (Exception ex) { throw new ParseException("The quoted string could not be parsed.", index); } //skip end token skip(); return unescapedString; } /** * Used to encode the parsed strings */ private static CharsetEncoder asciiEncoder; /** * Parses a string according to the format specified for ASCII property lists. * Such strings can contain escape sequences which are unescaped in this method. * * @param s The escaped string according to the ASCII property list format, without leading and trailing quotation marks. * @return The unescaped string in UTF-8 or ASCII format, depending on the contained characters. * @throws java.io.UnsupportedEncodingException If the en-/decoder for the UTF-8 or ASCII encoding could not be loaded * @throws java.nio.charset.CharacterCodingException If the string is encoded neither in ASCII nor in UTF-8 */ public static synchronized String parseQuotedString(String s) throws UnsupportedEncodingException, CharacterCodingException { List strBytes = new LinkedList(); StringCharacterIterator iterator = new StringCharacterIterator(s); char c = iterator.current(); while (iterator.getIndex() < iterator.getEndIndex()) { switch (c) { case '\\': { //An escaped sequence is following byte[] bts = parseEscapedSequence(iterator).getBytes("UTF-8"); for (byte b : bts) strBytes.add(b); break; } default: { //a normal ASCII char strBytes.add((byte) 0); strBytes.add((byte) c); break; } } c = iterator.next(); } byte[] bytArr = new byte[strBytes.size()]; int i = 0; for (Byte b : strBytes) { bytArr[i] = b.byteValue(); i++; } //Build string String result = new String(bytArr, "UTF-8"); CharBuffer charBuf = CharBuffer.wrap(result); //If the string can be represented in the ASCII codepage // --> use ASCII encoding if (asciiEncoder == null) asciiEncoder = Charset.forName("ASCII").newEncoder(); if (asciiEncoder.canEncode(charBuf)) return asciiEncoder.encode(charBuf).asCharBuffer().toString(); //The string contains characters outside the ASCII codepage // --> use the UTF-8 encoded string return result; } /** * Unescapes an escaped character sequence, e.g. \\u00FC. * * @param iterator The string character iterator pointing to the first character after the backslash * @return The unescaped character as a string. * @throws UnsupportedEncodingException If an invalid Unicode or ASCII escape sequence is found. */ private static String parseEscapedSequence(StringCharacterIterator iterator) throws UnsupportedEncodingException { char c = iterator.next(); if (c == '\\') { return new String(new byte[]{0, '\\'}, "UTF-8"); } else if (c == '"') { return new String(new byte[]{0, '\"'}, "UTF-8"); } else if (c == 'b') { return new String(new byte[]{0, '\b'}, "UTF-8"); } else if (c == 'n') { return new String(new byte[]{0, '\n'}, "UTF-8"); } else if (c == 'r') { return new String(new byte[]{0, '\r'}, "UTF-8"); } else if (c == 't') { return new String(new byte[]{0, '\t'}, "UTF-8"); } else if (c == 'U' || c == 'u') { //4 digit hex Unicode value String byte1 = ""; byte1 += iterator.next(); byte1 += iterator.next(); String byte2 = ""; byte2 += iterator.next(); byte2 += iterator.next(); byte[] stringBytes = {(byte) Integer.parseInt(byte1, 16), (byte) Integer.parseInt(byte2, 16)}; return new String(stringBytes, "UTF-8"); } else { //3 digit octal ASCII value String num = ""; num += c; num += iterator.next(); num += iterator.next(); int asciiCode = Integer.parseInt(num, 8); byte[] stringBytes = {0, (byte) asciiCode}; return new String(stringBytes, "UTF-8"); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy