All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.javanut.pronghorn.util.parse.JSONStreamParser Maven / Gradle / Ivy

Go to download

Ring buffer based queuing utility for applications that require high performance and/or a small footprint. Well suited for embedded and stream based processing.

There is a newer version: 1.1.27
Show newest version
package com.javanut.pronghorn.util.parse;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.javanut.pronghorn.util.Appendables;
import com.javanut.pronghorn.util.TrieKeyable;
import com.javanut.pronghorn.util.TrieParser;
import com.javanut.pronghorn.util.TrieParserReader;

public class JSONStreamParser {	
	
	private static final Logger logger = LoggerFactory.getLogger(JSONStreamParser.class);

	private static final int WHITE_SPACE = 1;
	private static final int WHITE_SPACE_4 = WHITE_SPACE | ((int)0x0D)<<8;
	private static final int WHITE_SPACE_3 = WHITE_SPACE | ((int)0x0A)<<8;
	private static final int WHITE_SPACE_2 = WHITE_SPACE | ((int)0x09)<<8;
	private static final int WHITE_SPACE_1 = WHITE_SPACE | ((int)0x20)<<8;
	private static final int STRING_PART = 3;
	private static final int STRING_PART_75 = STRING_PART | 0x7500;
	private static final int STRING_PART_74 = STRING_PART | 0x0900;
	private static final int STRING_PART_72 = STRING_PART | 0x0D00;
	private static final int STRING_PART_6E = STRING_PART | 0x0A00;
	private static final int STRING_PART_66 = STRING_PART | 0x0C00;
	private static final int STRING_PART_62 = STRING_PART | 0x0800;
	private static final int STRING_PART_2F = STRING_PART | 0x2F00;
	private static final int STRING_PART_5C = STRING_PART | 0x5C00;
	private static final int STRING_PART_22 = STRING_PART | 0x2200;
	private static final int STRING_END = 4;
	private static final int STRING_END_75 = STRING_END | 0x7500;
	private static final int STRING_END_74 = STRING_END | 0x0900;
	private static final int STRING_END_72 = STRING_END | 0x0D00;
	private static final int STRING_END_6E = STRING_END | 0x0A00;
	private static final int STRING_END_66 = STRING_END | 0x0C00;
	private static final int STRING_END_62 = STRING_END | 0x0800;
	private static final int STRING_END_2F = STRING_END | 0x2F00;
	private static final int STRING_END_5C = STRING_END | 0x5C00;
	private static final int STRING_END_22 = STRING_END  | 0x2200;
	private static final int CONTINUED_STRING = 5;
	private static final int NAME_SEPARATOR = 6;
	private static final int END_OBJECT = 7;
	private static final int VALUE_SEPARATOR = 8;
	private static final int BEGIN_ARRAY = 9;
	private static final int BEGIN_OBJECT = 10;
	private static final int END_ARRAY = 11;
	private static final int NUMBER_ID = 12;
	private static final int FALSE_ID = 13;
	private static final int NULL_ID = 14;
	private static final int TRUE_ID = 15;
	
	private static final byte DEFAULT_STATE = 0;
	private static final byte TEXT_STATE = 1;
	
	
	private static final TrieParser defaultParser = defaultParser();
	private static final TrieParser stringEndParser = stringEndParser();
	
	private final ByteConsumerCodePointConverter converter = new ByteConsumerCodePointConverter();
		
	
	public static  & TrieKeyable> TrieParser customParser(Class keys) {

		//2 because we need 2 shorts for the number
		TrieParser trie = new TrieParser(256,2,false,true);
		
		
		for (T key: keys.getEnumConstants()) {			
			int value = toValue(key.ordinal());
			assert(value>=0);
			
			trie.setUTF8Value("\"", key.getKey(), "\"", value);
						
		}
		populateWithJSONTokens(trie);
		
		return trie;
	}


	public static int toValue(int idx) {
		return idx<<8;
	}

	public static int fromValue(int idx) {
		return idx>>8;
	}

	
	public static void populateWithJSONTokens(TrieParser trie) {
		trie.setValue(JSONConstants.string222, STRING_END); //to captures quoted values
		
		//code for strings with escape sequences
		trie.setValue(JSONConstants.string221, STRING_PART);
		trie.setValue(JSONConstants.continuedString, CONTINUED_STRING);
		/////
		
		trie.setValue(JSONConstants.ws2, WHITE_SPACE_2);
		trie.setValue(JSONConstants.ws3, WHITE_SPACE_3);
		trie.setValue(JSONConstants.ws4, WHITE_SPACE_4);
		
		trie.setValue(JSONConstants.falseLiteral, FALSE_ID);
		trie.setValue(JSONConstants.nullLiteral, NULL_ID);
		trie.setValue(JSONConstants.trueLiteral, TRUE_ID);
				
		trie.setValue(JSONConstants.beginArray, BEGIN_ARRAY);
		trie.setValue(JSONConstants.endArray, END_ARRAY);			
		
		trie.setValue(JSONConstants.beginObject, BEGIN_OBJECT);
		trie.setValue(JSONConstants.endObject, END_OBJECT);
		trie.setValue(JSONConstants.ws1, WHITE_SPACE_1);
		
		trie.setValue(JSONConstants.number, NUMBER_ID);		
		
		trie.setValue(JSONConstants.valueSeparator, VALUE_SEPARATOR);
		trie.setValue(JSONConstants.nameSeparator, NAME_SEPARATOR);			


	}
	
	
	
	public static TrieParser defaultParser() {
			
	    	TrieParser trie = new TrieParser(256,1,false,true);

			populateWithJSONTokens(trie);
			
			return trie;
	}


	static TrieParser stringEndParser() {
		
    	TrieParser trie = new TrieParser(256,1,false,true);

		trie.setValue(JSONConstants.string5C1, STRING_PART_5C);
		trie.setValue(JSONConstants.string5C2, STRING_END_5C);
		
		trie.setValue(JSONConstants.string2F1, STRING_PART_2F);
		trie.setValue(JSONConstants.string2F2, STRING_END_2F);
		
		trie.setValue(JSONConstants.string621, STRING_PART_62);
		trie.setValue(JSONConstants.string622, STRING_END_62);
		
		trie.setValue(JSONConstants.string661, STRING_PART_66);
		trie.setValue(JSONConstants.string662, STRING_END_66);
		
		trie.setValue(JSONConstants.string6E1, STRING_PART_6E);
		trie.setValue(JSONConstants.string6E2, STRING_END_6E);
		
		trie.setValue(JSONConstants.string721, STRING_PART_72);
		trie.setValue(JSONConstants.string722, STRING_END_72);
		
		trie.setValue(JSONConstants.string741, STRING_PART_74);
		trie.setValue(JSONConstants.string742, STRING_END_74);
		
		trie.setValue(JSONConstants.string751, STRING_PART_75);
		trie.setValue(JSONConstants.string752, STRING_END_75);
				
		trie.setValue(JSONConstants.string221, STRING_PART_22);
		trie.setValue(JSONConstants.string222, STRING_END_22);
						
		return trie;
	}
	
    
	public static TrieParserReader newReader() {
		return new TrieParserReader();
	}


	public void parse(TrieParserReader reader, TrieParser customParser, JSONStreamVisitor visitor) {
		
		byte state = DEFAULT_STATE;
		
		//System.out.println("xxxx: "+customParser);
		
		while (visitor.isReady()) {
			if (DEFAULT_STATE == state) {
				
				//StringBuilder builder = new StringBuilder();
				//TrieParserReader.debugAsUTF8(reader, builder, 180);
				
				int pos = reader.sourcePos;
				
				//TrieParserReader.debugAsUTF8(reader, System.err);
				//System.err.println();
				
				final int id  = (int)TrieParserReader.parseNext(reader, customParser);
				
				//logger.info("start pos {} position is now {} vs ring buffer len {}", 
				//		pos, reader.sourcePos, reader.sourceLen);
				
				if (-1 == id) {
					assert(pos == reader.sourcePos) : "did not return to start position";
				}
				
				//logger.info("PRSE log event {}  ",id);
				
				
				//customParser.toDOT(System.out);
				
				switch (id) {
					case STRING_PART: //start of string change mode
						state = TEXT_STATE;
						visitor.stringBegin();
						TrieParserReader.capturedFieldBytes(reader, 0, visitor.stringAccumulator());
						break;
		            case CONTINUED_STRING: //continue string change mode
						//we have no string captured this is just a flag to change modes
		            	state = TEXT_STATE;	            	
						break;					
					case STRING_END: //full string
						visitor.stringBegin();
						TrieParserReader.capturedFieldBytes(reader, 0, visitor.stringAccumulator());
						visitor.stringEnd();
						break;
					case NAME_SEPARATOR:        // :
						visitor.nameSeparator();
						break;
					case BEGIN_OBJECT:	
						visitor.beginObject(); // {
						break;
					case END_OBJECT:
						visitor.endObject(); // }
						break;				
					case BEGIN_ARRAY:
						visitor.beginArray(); // [
						break;					
					case END_ARRAY:
						visitor.endArray();  // ]
						break;					
					case VALUE_SEPARATOR:    // ,
						visitor.valueSeparator();
						break;					
					case WHITE_SPACE_1:
					case WHITE_SPACE_2:
					case WHITE_SPACE_3:
					case WHITE_SPACE_4:						
						visitor.whiteSpace((byte)(id>>8));  // white space
						break;					
					case NUMBER_ID:
				   	    visitor.numberValue(TrieParserReader.capturedDecimalMField(reader, 0),TrieParserReader.capturedDecimalEField(reader, 0));
						break;					
					case FALSE_ID:
						visitor.literalFalse();
						break;					
					case NULL_ID:
						visitor.literalNull();
						break;					
					case TRUE_ID:
						visitor.literalTrue();
						break;
					case -1:
						//if less than longest known this is not an error we just need more data...
						//TODO:confirm grows.
						if (reader.parseHasContentLength(reader) > customParser.longestKnown()) {
							System.err.println("at position "+reader.sourcePos);
							System.err.print("Unable to parse: '");
							TrieParserReader.debugAsUTF8(reader, System.err,100,false);
							System.err.println("'");
							
							TrieParserReader.debugAsArray(reader, System.err, 80);
							System.err.println();
							if (reader.sourceLen>80) {
								System.err.println("warning we have "+reader.sourceLen+" total.");
							}
							
							throw new RuntimeException("check that JSON tags are expected.");
						}
						
						return;
					default:
					
						//the only values here are the ones matching the custom strings 	
						visitor.customString(fromValue(id));	
				}			
				
			} else {
				//text state;
				
				int id = (int)TrieParserReader.parseNext(reader, stringEndParser);
				
				//logger.info("log text {} ",id);
				
				if (id!=-1) {
					int type = 0xFF&id;
					int value = (fromValue(id));
					
					if (0x75!=value) {
					
						visitor.stringAccumulator().consume((byte)value);
						TrieParserReader.capturedFieldBytes(reader, 0, visitor.stringAccumulator());				
					
					} else {				
												
						// uXXXX 4HexDig conversion
						converter.setTarget(visitor.stringAccumulator());
						TrieParserReader.capturedFieldBytes(reader, 0, converter);
									
					}
					
					if (STRING_END == type) {
						state = DEFAULT_STATE;
						visitor.stringEnd();
					} 
				} else {
					//TrieParserReader.debugAsUTF8(reader, System.err);
					reader.moveBack(1);//we need the new call to see teh slash
					return;
				}
				
			}
			
		};
		
	}
	
    public void parse(TrieParserReader reader, JSONStreamVisitor visitor) {

		
		byte state = DEFAULT_STATE;
		
		while (visitor.isReady()) {
			if (DEFAULT_STATE == state) {
				
				int id  = (int)TrieParserReader.parseNext(reader, defaultParser);
				
				
				//logger.info("P2 log event {} remaining {}",id,reader.sourceLen);
				
				switch (id) {
					case STRING_PART: //start of string change mode
						state = TEXT_STATE;
						visitor.stringBegin();
						TrieParserReader.capturedFieldBytes(reader, 0, visitor.stringAccumulator());
						break;
		            case CONTINUED_STRING: //continue string change mode
						//we have no string captured this is just a flag to change modes
		            	state = TEXT_STATE;	            	
						break;					
					case STRING_END: //full string
						visitor.stringBegin();
						TrieParserReader.capturedFieldBytes(reader, 0, visitor.stringAccumulator());
						visitor.stringEnd();
						break;
					case NAME_SEPARATOR:        // :
						visitor.nameSeparator();
						break;
					case BEGIN_OBJECT:	
						visitor.beginObject(); // {
						break;
					case END_OBJECT:
						visitor.endObject(); // }
						break;				
					case BEGIN_ARRAY:
						visitor.beginArray(); // [
						break;					
					case END_ARRAY:
						visitor.endArray();  // ]
						break;					
					case VALUE_SEPARATOR:    // ,
						visitor.valueSeparator();
						break;					
					case WHITE_SPACE_1:
					case WHITE_SPACE_2:
					case WHITE_SPACE_3:
					case WHITE_SPACE_4:						
						visitor.whiteSpace((byte)(id>>8));  // white space
						break;					
					case NUMBER_ID:
					    visitor.numberValue(TrieParserReader.capturedDecimalMField(reader, 0),TrieParserReader.capturedDecimalEField(reader, 0));
						break;					
					case FALSE_ID:
						visitor.literalFalse();
						break;					
					case NULL_ID:
						visitor.literalNull();
						break;					
					case TRUE_ID:
						visitor.literalTrue();
						break;
					case -1:						
						//System.err.println("last text was unfound");
						//TrieParserReader.debugAsUTF8(reader, System.err);						
						return;
					default:
						System.out.println("unknown value: "+id);
				}			
				
			} else {
				//text state;
				
				//TODO: quote not found and not rolling back to other def
				//TODO: fast def must be run first not slow one!!!!
				
				int id = (int)TrieParserReader.parseNext(reader, stringEndParser);
				
			//	logger.info("log text {} ",id);
				
				if (id!=-1) {
					int type = 0xFF&id;
					int value = (fromValue(id));
					
					if (0x75 != value) {
						
						visitor.stringAccumulator().consume((byte)value);
						TrieParserReader.capturedFieldBytes(reader, 0, visitor.stringAccumulator());
						
					} else {			
						
						// uXXXX 4HexDig conversion
						converter.setTarget(visitor.stringAccumulator());
						TrieParserReader.capturedFieldBytes(reader, 0, converter);
						
					}
					
					if (STRING_END == type) {
						state = DEFAULT_STATE;
						visitor.stringEnd();
					} 
				} else {
					//TrieParserReader.debugAsUTF8(reader, System.err);
					reader.moveBack(1);//we need the new call to see teh slash
					//exit the parse because we have run out of data, will continue later
					return;  
				}
				
			}
		}
		
	}



}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy