All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prompto.reader.CSVReader Maven / Gradle / Ivy

There is a newer version: 0.1.57
Show newest version
package prompto.reader;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;

import prompto.error.ReadWriteError;
import prompto.intrinsic.IResource;
import prompto.intrinsic.PromptoDict;
import prompto.intrinsic.PromptoDocument;
import prompto.intrinsic.PromptoList;

public abstract class CSVReader {
	
	public static PromptoList readHeaders(String data, Character separator, Character encloser) throws IOException {
		try(StringReader reader = data==null ? null : new StringReader(data)) {
			CSVIterable iterable = iterator(reader, null, separator, encloser);
			ArrayList headers = iterable.readHeaders();
			return new PromptoList<>(headers, false);
		}
	}

	public static PromptoList> read(String data, PromptoDict columns, Character separator, Character encloser) throws IOException {
		try(StringReader reader = data==null ? null : new StringReader(data)) {
			return read(reader, columns, separator, encloser);
		}
	}
	
	public static PromptoList> read(final Reader reader, PromptoDict columns, Character separator, Character encloser) {
		PromptoList> list = new PromptoList<>(false);
		Iterator> iter = iterator(reader, columns, separator, encloser);
		while(iter.hasNext())
			list.add(iter.next());
		return list;
	}
	
	static interface CSVIterable extends Iterable>, Iterator> {
		ArrayList readHeaders();
	}
	
	public static CSVIterable iterator(String data, PromptoDict columns, Character separator, Character encloser) throws IOException {
		var reader = data==null ? null : new StringReader(data);
		return iterator(reader, columns, separator, encloser);
	}
	
	public static CSVIterable iterator(IResource resource, PromptoDict columns, Character separator, Character encloser) throws IOException {
		try(var reader = resource==null ? null : resource.asReader()) {
			return iterator(reader, columns, separator, encloser);
		}
	}

	public static CSVIterable iterator(final Reader _reader, PromptoDict columns, Character separator, Character encloser) {
		
		char sep = separator==null ? ',' : separator.charValue();
		char quote = encloser==null ? '"' : encloser.charValue();
		
		return new CSVIterable() {
			
			Reader reader = _reader;
			ArrayList headers = null;
			Integer peekedChar = null;
			int nextChar = 0;
			
			@Override
			public void finalize()
			{
				if(reader!=null) try {
					reader.close();
				} catch(IOException e) {
					// absorb it
				}
			}
			
			@Override
			public Iterator> iterator() {
				return this;
			}
			
			@Override
			public ArrayList readHeaders() {
				if(nextChar==0)
					fetchChar(true);
				return parseHeaders(null);
			}

			@Override
			public boolean hasNext() {
				if(nextChar==0)
					fetchChar(true);
				if(headers==null)
					headers = parseHeaders(columns);
				return nextChar>0;
			}
			
			private void fetchChar() {
				fetchChar(false);
			}
			
			private void fetchChar(boolean eatNewLine) {
				if(reader==null)
					nextChar = -1; // EOF
				else if(peekedChar!=null) {
					int c = peekedChar.intValue();
					peekedChar = null;
					nextChar = c;
				} else try {
					int c = reader.read();
					if(c=='\r')
						fetchChar(eatNewLine);
					else if(eatNewLine && (c=='\n'))
						fetchChar(eatNewLine);
					else
						nextChar = c;
				} catch(IOException e) {
					throw new ReadWriteError(e.getMessage());
				}
			}
			
			private int peekChar() {
				if(peekedChar==null) {
					int oldChar = nextChar;
					fetchChar();
					peekedChar = nextChar;
					nextChar = oldChar;
				}
				return peekedChar.intValue();
			}



			private ArrayList parseHeaders(PromptoDict columns) {
				ArrayList list = parseLine();
				if(columns!=null) {
					for(int i=0;i parseLine() {
				ArrayList list = new ArrayList<>();
				while(parseValue(list))
					;
				if(nextChar=='\n')
					fetchChar();
				return list;
			}

			private boolean parseValue(ArrayList list) {
				if(nextChar==sep)
					parseEmptyValue(list);
				else if(nextChar==quote)
					parseQuotedValue(list);
				else 
					parseUnquotedValue(list);
				return nextChar!=-1 && nextChar!='\n';
			}

			private void parseEmptyValue(ArrayList list) {
				list.add(null);
				fetchChar();
			}

			private void parseQuotedValue(ArrayList list) {
				fetchChar(); // consume the leading double quote
				parseValue(quote, list);
				// look for next sep
				while(nextChar!=sep && nextChar!=-1 && nextChar!='\n')
					fetchChar();
				if(nextChar==sep)
					fetchChar();
			}

			private void parseUnquotedValue(ArrayList list) {
				parseValue(sep, list);
			}
			
			private void parseValue(char endChar, ArrayList list) {
				StringBuilder sb = new StringBuilder();
				boolean exit = false;
				for(;;) {
					if(nextChar==-1)
						exit = handleEOF(sb, endChar, list);
					else if(nextChar=='\n')
						exit = handleNewLine(sb, endChar, list);
					else if(nextChar==endChar)
						exit = handleEndChar(sb, endChar, list);
					else if(nextChar=='\\')
						exit = handleEscape(sb, endChar, list);
					else
						exit = handleOtherChar(sb, endChar, list);
					if(exit) {
						if(sb.length()>0)
							list.add(sb.toString());
						return;
					}
				}
			}

			private boolean handleOtherChar(StringBuilder sb, char endChar, ArrayList list) {
				sb.append((char)nextChar);
				fetchChar();
				return false;
			}

			private boolean handleEscape(StringBuilder sb, char endChar, ArrayList list) {
				if(peekChar()!=-1) {
					sb.append((char)peekChar());
					fetchChar();
				}
				fetchChar();
				return false;
			}

			private boolean handleEOF(StringBuilder sb, char endChar, ArrayList list) {
				return true;
			}

			private boolean handleEndChar(StringBuilder sb, char endChar, ArrayList list) {
				if(endChar=='"' && peekChar()==endChar) {
					sb.append((char)nextChar);
					fetchChar();
					fetchChar();
					return false;
				} else {
					fetchChar();
					return true;
				}
			}

			private boolean handleNewLine(StringBuilder sb, char endChar, ArrayList list) {
				if(endChar=='"') {
					sb.append((char)nextChar);
					fetchChar();
					return false;
				} else {
					return true;
				}
			}
			
			
			@Override
			public PromptoDocument next() {
				if(!hasNext())
					return null;
				ArrayList values = parseLine();
				PromptoDocument doc = new PromptoDocument<>();
				for(int i=0;i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy