All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.ds.util.flatfile.ParquetFileIterator Maven / Gradle / Ivy

The newest version!
package prerna.ds.util.flatfile;

import java.io.IOException;
import java.util.List;

import prerna.algorithm.api.SemossDataType;
import prerna.poi.main.helper.ParquetFileHelper;
import prerna.query.querystruct.ParquetQueryStruct;
import prerna.query.querystruct.selectors.IQuerySelector;
import prerna.query.querystruct.selectors.QueryColumnSelector;
import prerna.util.ArrayUtilityMethods;

public class ParquetFileIterator extends AbstractFileIterator {

	private ParquetFileHelper helper;
	private ParquetQueryStruct qs;

	/**
	 * Simple iterator used when all the information can be parsed from the QS
	 * @param qs
	 */

	public ParquetFileIterator(ParquetQueryStruct qs) {
		this.qs = qs;
		this.fileLocation = qs.getFilePath();
		// set default values
		this.helper = new ParquetFileHelper();
		this.helper.setFileLocation(this.fileLocation);
		
		// dont think I need this
		//this.helper.parse(qs.getFilePath());

		this.dataTypeMap = qs.getColumnTypes();
		this.newHeaders = qs.getNewHeaderNames();

		// set the user defined headers
		if (this.newHeaders != null && !this.newHeaders.isEmpty()) {
			this.helper.modifyCleanedHeaders(this.newHeaders);
		}
		
		setSelectors(qs.getSelectors());
		
		// now that I have set the headers from the setSelectors
		this.headers = this.helper.getHeaders();
		this.additionalTypesMap = qs.getAdditionalTypes();

		if(this.dataTypeMap != null && !this.dataTypeMap.isEmpty()) {
			this.types = new SemossDataType[this.headers.length];
			this.additionalTypes = new String[this.headers.length];
			for(int index = 0; index < this.headers.length; index++) {
				this.types[index] = SemossDataType.convertStringToDataType(dataTypeMap.get(this.headers[index]));
				if(this.additionalTypesMap != null) {
					this.additionalTypes[index] = additionalTypesMap.get(this.headers[index]);
				}
			}
			this.helper.parseColumns(this.headers);
		} else {
			// shouldnt have any unkown types because we stop this at the beginning
			//setUnknownTypes();
			setSelectors(qs.getSelectors());
			qs.setColumnTypes(this.dataTypeMap);
		}

		//this.getNextRow(); // this will get the first row of the file
		// set limit and offset
		this.limit = qs.getLimit();
		this.offset = qs.getOffset();
	}
	
	private void setSelectors(List selectors) {
		if (selectors.isEmpty()) {
			// if no selectors, return everything
			String[] allHeaders = this.helper.getHeaders();
			for(int i = 0; i < allHeaders.length; i++) {
				QueryColumnSelector newSelector = new QueryColumnSelector("DND__" + allHeaders[i]);
				this.qs.addSelector(newSelector);
			}
			return; 
		}

		int numSelectors = selectors.size();
		String[] parquetSelectors = new String[numSelectors];
		for(int i = 0; i < numSelectors; i++) {
			QueryColumnSelector newSelector = (QueryColumnSelector) selectors.get(i);
			if(newSelector.getSelectorType() != IQuerySelector.SELECTOR_TYPE.COLUMN) {
				throw new IllegalArgumentException("Cannot perform math on a csv import");
			}
			parquetSelectors[i] = newSelector.getAlias();;
		}

		String[] allHeaders = this.helper.getHeaders();
		//String[] allHeaders = this.helper.getNewUniqueParquetHeaders().stream().toArray(String[]::new);

		
		if(allHeaders.length != parquetSelectors.length) {
			// order the selectors
			// all headers will be ordered
			String[] orderedSelectors = new String[parquetSelectors.length];
			int counter = 0;
			for(String header : allHeaders) {
				if(ArrayUtilityMethods.arrayContainsValue(parquetSelectors, header)) {
					orderedSelectors[counter] = header;
					counter++;
				}
			}

			this.helper.parseColumns(orderedSelectors);
			// after redoing the selectors, we need to skip the headers 
		}
	}

	@Override
	public void reset() throws Exception {
		// TODO Auto-generated method stub
	}
	
	@Override
	public void close() throws IOException {
		// TODO Auto-generated method stub
		
	}
	
	@Override
	public void getNextRow() {
		// TODO Auto-generated method stub
		
	}
	
	public ParquetQueryStruct getQs() {
		return this.qs;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy