All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.appdapter.fileconv.FileStreamUtils Maven / Gradle / Ivy

There is a newer version: 1.2.4
Show newest version
/*
 *  Copyright 2012 by The Appdapter Project (www.appdapter.org).
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.appdapter.fileconv;

import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.appdapter.core.log.Debuggable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.util.FileManager;

/**
 * @author Stu B. 
 *
 * @author logicmoo
 */

public abstract class FileStreamUtils {

	static private Logger theLogger = LoggerFactory.getLogger(FileStreamUtils.class);

	public static boolean SheetURLDataReaderMayReturnNullOnError = true;

	public static Reader makeSheetURLDataReader(String fullUrlTxt) throws IOException {
		try {
			return new InputStreamReader((new URL(fullUrlTxt)).openStream());
		} catch (Throwable t) {
			theLogger.error("Cannot read[" + fullUrlTxt + "]", t);
			if (SheetURLDataReaderMayReturnNullOnError) {
				return null;
			}
			throw Debuggable.reThrowable(t, IOException.class);
		}
	}

	public static boolean canLoadWorkbooks() {
		String loc = "GluePuma_HRKR50_TestFull_OnDisk.xlsx";
		URL url = FileStreamUtils.class.getResource(loc);
		if (url != null) {
			loc = url.toString();
		}
		try {
			url = new URL(loc);
			theLogger.warn("canLoadWorkbook: " + loc + "=" + url);
			Workbook workbook = getWorkbook(url.openStream(), "xlsx");
			Sheet sheet = null;
			int max = workbook.getNumberOfSheets() - 1;
			for (int sheetNumber = 0; sheetNumber <= max; sheetNumber++) {

				sheet = workbook.getSheetAt(sheetNumber);
				String sn = sheet.getSheetName();
				theLogger.warn("canLoadWorkbook: " + sn + "=" + loc);
				Reader reader = sheetToReader(sheet);
				LineNumberReader lnr = new LineNumberReader(reader);
				while ((lnr.readLine()) != null) {
				}
			}
		} catch (InvalidFormatException e) {
			theLogger.error("canLoadWorkbook ", e);
		} catch (IOException e) {
			theLogger.error("canLoadWorkbook ", e);
		} catch (Throwable e) {
			theLogger.error("canLoadWorkbook ", e);
		}
		return false;
	}

	public static Workbook getWorkbook(InputStream is, String extHint) throws IOException, InvalidFormatException {
		if (is == null)
			throw new IOException("Not input stream for hint: " + extHint);
		try {
			return WorkbookFactory.create(is);
		} catch (Exception e0) {
			if (extHint == null) {
				extHint = "xlsx";
			} else {
				extHint = extHint.toLowerCase();
			}
			try {
				if (extHint.endsWith("xlsx"))
					return new XSSFWorkbook(OPCPackage.open(is));
				if (extHint.endsWith("xls"))
					return new HSSFWorkbook(is);
				// openoffice documents
				return new XSSFWorkbook(OPCPackage.open(is));
			} catch (Exception e2) {
				return new HSSFWorkbook(is);
			}
		}
	}

	abstract public InputStream openInputStream(String srcPath, java.util.List cls) throws IOException;

	abstract public InputStream openInputStreamOrNull(String srcPath, java.util.List cls);

	public Workbook getWorkbook(String sheetLocation, java.util.List fileModelCLs) throws InvalidFormatException, IOException {
		InputStream stream = openInputStreamOrNull(sheetLocation, fileModelCLs);
		if (stream == null)
			throw new IOException("Location not found: " + sheetLocation);
		return getWorkbook(stream, getFileExt(sheetLocation));
	}

	public Reader getSheetReaderAt(String sheetLocation, String sheetName, java.util.List fileModelCLs) {
		try {
			theLogger.info("getSheetReaderAt: " + sheetLocation + "!" + sheetName);
			return getWorkbookSheetCsvReaderAt(sheetLocation, sheetName, fileModelCLs);
		} catch (InvalidFormatException e) {
			theLogger.error("getWorkbookSheetCsvReaderAt ", e);
		} catch (IOException e) {
			theLogger.error("getWorkbookSheetCsvReaderAt ", e);
		}
		return null;
	}

	public static Model getModelIfAvailable(String sheetLocation, FileManager fm) {
		try {
			return fm.loadModel(sheetLocation);
		} catch (Exception e) {
			return null;
		}
	}

	public static boolean isNullOrEmptyString(CharSequence str) {
		return str == null || str.length() == 0;
	}

	public static String[] splitOfSubPath(String str) {
		String[] splt = str.split("#");
		if (splt.length == 2)
			return splt;
		return str.split("!");
	}

	public static String combineLocationAndSheet(String sheetLocation, String sheetName) {
		boolean missingSheetLocation = isNullOrEmptyString(sheetLocation);
		boolean missingSheetName = isNullOrEmptyString(sheetName);
		if (missingSheetLocation && missingSheetName) {
			return null;
		}
		if (missingSheetName)
			return sheetLocation;
		if (missingSheetLocation)
			return sheetName;
		String combined = sheetLocation + "!" + sheetName;
		return combined;
	}

	public Reader getWorkbookSheetCsvReaderAt(String sheetLocation, String sheetName, java.util.List fileModelCLs) throws InvalidFormatException, IOException {
		boolean missingSheetLocation = isNullOrEmptyString(sheetLocation);
		boolean missingSheetName = isNullOrEmptyString(sheetName);
		if (missingSheetLocation && missingSheetName) {
			return NotFound("NULL SheetReader Location");
		}
		String combined = combineLocationAndSheet(sheetLocation, sheetName);
		Workbook workbook = getWorkbook(sheetLocation, fileModelCLs);
		if (workbook == null) {
			InputStream is = openInputStreamOrNull(sheetName, fileModelCLs);
			if (is == null)
				is = openInputStreamOrNull(combined, fileModelCLs);
			if (is == null)
				return NotFound(combined);

			String ext = getFileExt(sheetName);
			if (ext != null && ext.endsWith("csv")) {
				return new InputStreamReader(is);
			}
			workbook = getWorkbook(is, ext);
			if (workbook == null)
				return NotFound(sheetLocation + sheetName);
			return sheetToReader(workbook.getSheetAt(0));
		}
		// i know this is dead code but it might not be soon
		if (workbook == null)
			return NotFound(sheetLocation + sheetName);
		int sheetNumber = workbook.getSheetIndex(sheetName);
		Sheet sheet = null;
		if (sheetNumber >= 0)
			sheet = workbook.getSheetAt(sheetNumber);
		// use the workbook API
		if (sheet != null)
			return sheetToReader(sheet);
		Sheet sheet2 = null;
		int max = workbook.getNumberOfSheets() - 1;
		String sheetNameS = matchableName(sheetName);
		for (sheetNumber = 0; sheetNumber <= max; sheetNumber++) {
			sheet = workbook.getSheetAt(sheetNumber);
			String sn = matchableName(sheet.getSheetName());
			// found it by name
			if (sheetNameS.equals(sn))
				return sheetToReader(sheet);
			// cases like "Nspc.csv"
			if (sheetNameS.startsWith(sn))
				sheet2 = sheet;
		}
		if (sheet2 != null)
			return sheetToReader(sheet2);
		return NotFound(sheetLocation + sheetName);
	}

	public static String matchableName(String sheetName) {
		if (sheetName == null)
			return "";
		return (sheetName + " ").replace(".csv ", "").replace(".xlsx ", "").replaceAll("-", "").replaceAll(" ", "").toLowerCase();
	}

	private static Reader NotFound(String string) throws IOException {
		throw new FileNotFoundException(string);
	}

	public static String getFileExt(String srcPath) {
		int at = srcPath.lastIndexOf('.');
		if (at < 0)
			return null;
		return srcPath.substring(at + 1).toLowerCase();
	}

	public static Reader sheetToReader(Sheet sheet) {
		String str = sheetToString(sheet);
		String sn = sheet.getSheetName();
		getLogger().debug("Reading Sheet " + sn + " " + str.length() + " bytes");
		saveFileString(sn, str);
		return new StringReader(str);
	}

	public static void saveFileString(String sn) {
		URL url;
		try {
			url = new URL(sn);
			InputStream is = url.openStream();
			BufferedInputStream bis = new BufferedInputStream(is);
			ByteArrayOutputStream buf = new ByteArrayOutputStream();
			int result = bis.read();
			while (result != -1) {
				byte b = (byte) result;
				buf.write(b);
				result = bis.read();
			}

			saveFileString(sn.replaceAll(":", "-").replaceAll("/", "-").replaceAll(".", "-").replaceAll("?", "-").replaceAll("=", "-").replaceAll("--", "-"), buf.toString());
		} catch (Exception e) {
			e.printStackTrace();
		}

	}

	@SuppressWarnings("unused") private static void saveFileString(String sn, String str) {
		if (true)
			return;
		try {
			FileWriter fw = new FileWriter(new File(matchableName(sn) + ".csv"));
			fw.write(str);
			fw.close();
		} catch (IOException e) {
			getLogger().error(" Saving Sheet " + sn + " { \n " + str + "\n }", e);
			e.printStackTrace();
		}
	}

	protected static Logger getLogger() {
		return theLogger;
	}

	public static String sheetToString(Sheet sheet) {
		StringBuffer sheetBuff = new StringBuffer();
		if (sheet.getPhysicalNumberOfRows() == 0) {
			throw new RuntimeException("No rows on sheet: " + sheet);
		}
		int width = getSheetWidth(sheet);
		int maxInclusive = sheet.getLastRowNum();
		for (int i = sheet.getFirstRowNum(); i <= maxInclusive; i++) {
			Row row = sheet.getRow(i);
			if (row == null)
				continue;
			int rwInclusve = row.getLastCellNum();
			String str = getRowString(row, width);
			StringBuffer strBuff = new StringBuffer(str);
			// pad the rest
			int pad = width - rwInclusve;
			for (int j = 0; j < pad; j++) {
				strBuff.append(",");
			}
			sheetBuff.append(strBuff.toString().trim() + "\n");
		}
		return sheetBuff.toString();
	}

	private static String getRowString(Row row, int width) {
		return getRowString(row, width, false);
	}

	private static String getRowDebugString(Row row, int width) {
		return getRowString(row, width, true);
	}

	private static String getRowString(Row row, int width, boolean debugString) {
		int rwInclusve = row.getLastCellNum();
		if (rwInclusve > width)
			rwInclusve = width;
		StringBuffer strBuff = new StringBuffer();
		if (debugString) {
			strBuff.append("##;; " + row.getSheet().getSheetName() + " rownum= " + row.getRowNum() + "\n\n");
		}

		for (int j = 0; j <= rwInclusve; j++) {
			Cell cell = row.getCell(j);
			if (j > 0)
				strBuff.append(",");

			if (cell == null)
				continue;
			String t, c;
			String s = null;

			switch (cell.getCellType()) {
			case Cell.CELL_TYPE_BLANK:
				continue;
			case Cell.CELL_TYPE_STRING: {
				c = cell.getStringCellValue();
				strBuff.append(escapeCSV(c));
				continue;
			}

			case Cell.CELL_TYPE_NUMERIC: {
				t = "CELL_TYPE_NUMERIC";
				c = ("" + cell.getNumericCellValue() + " ").replace(".0 ", "").trim();
				break;
			}
			case Cell.CELL_TYPE_FORMULA: {
				t = "CELL_TYPE_FORMULA";
				try {
					c = cell.getCellFormula();
				} catch (org.apache.poi.ss.formula.FormulaParseException e) {
					if (Debuggable.isRelease())
						theLogger.warn("" + e);
					else {
						theLogger.error("" + e, e);
					}
					cell.setCellType(Cell.CELL_TYPE_STRING);
					c = cell.getStringCellValue();
				}
				break;
			}
			case Cell.CELL_TYPE_BOOLEAN: {
				t = "CELL_TYPE_BOOLEAN";
				c = "" + cell.getBooleanCellValue();
				break;
			}
			case Cell.CELL_TYPE_ERROR: {
				t = "CELL_TYPE_ERROR";
				c = "" + cell.getErrorCellValue();
				break;
			}
			default: {
				t = "CELL_TYPE_" + cell.getCellType();
				c = cell.getStringCellValue();
				break;
			}
			}
			cell.setCellType(Cell.CELL_TYPE_STRING);
			s = cell.getStringCellValue();
			if (s != null && s.length() > 1) {
				strBuff.append(escapeCSV(s));
				continue;
			}

			if (s == null || s.length() < 1) {
				if (!debugString) {
					String msg = Debuggable.toInfoStringArgV(t + " really? " + c, "cell=" + cell, "cellAsString=" + s, //
							"row.getClass= " + row.getClass(), "sheet=" + cell.getSheet().getSheetName(), //
							"row=" + cell.getRow(), "rowstr = " + getRowDebugString(row, width));
					if (!Debuggable.isRelease())
						Debuggable.doBreak(msg);
				}
			}
			c = s;
			strBuff.append(escapeCSV(c));
			continue;
		}
		return strBuff.toString();
	}

	private static int getSheetWidth(Sheet sheet) {
		// if (true) return 0;
		Row row = sheet.getRow(sheet.getFirstRowNum());
		int hadStuff = -1;
		for (int i = 0; i < row.getLastCellNum(); i++) {
			Cell c = row.getCell(i);
			if (c == null)
				continue;
			if (c.getCellType() == Cell.CELL_TYPE_STRING)
				hadStuff = i;
		}
		return hadStuff;
	}

	private static Object escapeCSV(Object cellValue) {
		if (cellValue == null)
			return "";
		String cellValueStr = cellValue.toString();
		return escapeEmbeddedCharacters(cellValueStr);
	}

	private static String escapeEmbeddedCharacters(String field) {
		if (field == null)
			return "";
		field = field.replace("\r\n", "\n").replace("\r", "\n").replace("\n", " ").trim();
		if (field.length() == 0)
			return field;

		// If the fields contents should be formatted to conform with Excel's
		// convention....

		// Firstly, check if there are any speech marks (") in the field;
		// each occurrence must be escaped with another set of speech marks
		// and then the entire field should be enclosed within another
		// set of speech marks. Thus, "Yes" he said would become
		// """Yes"" he said"
		if (field.contains("\"") || field.contains("\n") || field.contains(",")) {
			return "\"" + field.replaceAll("\"", "\"\"") + "\"";
		}
		return field;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy