All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umcg.genetica.io.text.TextFile Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package umcg.genetica.io.text;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.*;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import umcg.genetica.containers.Pair;
import umcg.genetica.containers.Triple;
import umcg.genetica.text.Strings;

/**
 *
 * @author harmjan
 */
public class TextFile implements Iterable {
// check 1,2,3

	public static final int DEFAULT_BUFFER_SIZE = 4096;
	public static final Pattern tab = Pattern.compile("\\t");//Using the \t from string.tab is technically not valid. I would not want to depend on this
	public static final Pattern space = Strings.space;
	public static final Pattern colon = Strings.colon;
	public static final Pattern semicolon = Strings.semicolon;
	public static final Pattern comma = Strings.comma;
	protected BufferedReader in;
	protected File file;
	public static final boolean W = true;
	public static final boolean R = false;
	protected BufferedWriter out;
	protected boolean writeable;
	protected static final String ENCODING = "ISO-8859-1";
	private boolean gzipped;
	private int buffersize;

	public TextFile(String file, boolean mode) throws IOException {
		this(new File(file), mode, DEFAULT_BUFFER_SIZE);
	}

	public TextFile(File file, boolean mode) throws IOException {
		this(file, mode, DEFAULT_BUFFER_SIZE);
	}

	public TextFile(File file, boolean mode, int buffersize) throws IOException {
		
		this.buffersize = buffersize;
		this.file = file;
		
		String loc = file.getAbsolutePath();
		if (loc.trim().length() == 0) {
			throw new IOException("Could not find file: no file specified");
		}
		this.writeable = mode;

		if (loc.endsWith(".gz")) {
			gzipped = true;
		}
		open();
	}
	
	public TextFile(String file, boolean mode, int buffersize) throws IOException {
		this(new File(file), mode, buffersize);
	}

	public final void open() throws IOException {

		if (!file.exists() && !writeable) {
			throw new IOException("Could not find file: " + file);
		} else {
			if (writeable) {
				if (gzipped) {
					GZIPOutputStream gzipOutputStream = new GZIPOutputStream(new FileOutputStream(file));
					out = new BufferedWriter(new OutputStreamWriter(gzipOutputStream), buffersize);
				} else {
					out = new BufferedWriter(new FileWriter(file), buffersize);
				}
			} else {
				if (gzipped) {
					GZIPInputStream gzipInputStream = new GZIPInputStream(new FileInputStream(file));
					in = new BufferedReader(new InputStreamReader(gzipInputStream, "US-ASCII"));
				} else {
//                System.out.println("Opening file: "+file);
					in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ENCODING), 8096);
				}
			}
		}
	}

	public String readLine() throws IOException {
		return in.readLine();
	}

	public void write(String line) throws IOException {
		out.write(line);
	}

	public void close() throws IOException {
//        System.out.println("Closing "+file);
		if (writeable) {
			out.close();
		} else {
			in.close();
		}
	}

	/**
	 * This method is a wrapper for readLineElemsReturnReference: this method
	 * returns default substrings delimited by Pattern p.
	 *
	 * @param p The Pattern object to split with (e.g. TextFile.tab or
	 * Strings.comma)
	 * @return New String objects for each substring delimited by Pattern p
	 * @throws IOException
	 */
	public String[] readLineElems(Pattern p) throws IOException {
		return readLineElemsReturnReference(p);
	}

	private Iterator readLineElemsIterator(Pattern p) throws IOException {
		return new TextFileIteratorElements(this, p);
	}

	public Iterable readLineElemsIterable(Pattern p) {
		return new TextFileIterableElements(this, p);
	}
	
	/**
	 * This method returns default substrings delimited by Pattern p. As such,
	 * this method may be more memory-efficient in some situations (for example
	 * when only a multiple columns should be loaded and stored in memory).
	 *
	 * @param p The Pattern object to split with (e.g. TextFile.tab or
	 * Strings.comma)
	 * @return New String objects for each substring delimited by Pattern p
	 * @throws IOException
	 */
	public String[] readLineElemsReturnReference(Pattern p) throws IOException {
		if (in != null) {
			String ln = readLine();
			if (ln != null) {
				String[] elems = p.split(ln);
				ln = null;
				return elems;
			} else {
				return null;
			}
		} else {
			return null;
		}
	}

	/**
	 * This method returns a new object for each of the splitted elements,
	 * instead of the default action, in which each substring is backed by the
	 * original full-length String. As such, this method may be more
	 * memory-efficient in some situations (for example when only a single
	 * column should be loaded and stored in memory).
	 *
	 * @param p The Pattern object to split with (e.g. TextFile.tab or
	 * Strings.comma)
	 * @return New String objects for each substring delimited by Pattern p
	 * @throws IOException
	 */
	public String[] readLineElemsReturnObjects(Pattern p) throws IOException {
		if (in != null) {
			String ln = readLine();
			if (ln != null) {
				String[] origelems = p.split(ln);
				String[] returnelems = new String[origelems.length];
				for (int i = 0; i < origelems.length; i++) {
					returnelems[i] = new String(origelems[i]);
				}
				ln = null;
				return returnelems;
			} else {
				return null;
			}
		} else {
			return null;
		}


	}

	public int countLines() throws IOException {
		String ln = readLine();
		int ct = 0;
		while (ln != null) {
			if (ln.trim().length() > 0) {
				ct++;
			}
			ln = readLine();
		}
		close();
		open();
		return ct;
	}

	public int countCols(Pattern p) throws IOException {
		String ln = readLine();
		int ct = 0;
		if (ln != null) {
			String[] elems = p.split(ln);
			ct = elems.length;
		}
		close();
		open();
		return ct;
	}

	public String[] readAsArray() throws IOException {
		int numLines = countLines();
		String ln = readLine();
		if (ln == null) {
			return new String[0];
		}
		String[] data = new String[numLines];
		int i = 0;
		while (ln != null) {
			if (ln.trim().length() > 0) {
				data[i] = ln;
				i++;
			}
			ln = in.readLine();
		}
		return data;
	}

	public String[] readAsArray(int col, Pattern p) throws IOException {
		int numLines = countLines();
		String[] data = new String[numLines];
		int i = 0;
		String[] elems = readLineElems(p);
		while (elems != null) {
			if (elems.length > col) {
				data[i] = elems[col];
			}
			i++;
			elems = readLineElems(p);
		}
		return data;
	}

	public ArrayList readAsArrayList() throws IOException {
		ArrayList data = new ArrayList();
        
        String ln = readLine();
		while (ln != null) {
			if (ln.trim().length() > 0) {
				data.add(ln);
			}
			ln = in.readLine();
		}
		return data;
	}

	public ArrayList readAsArrayList(int col, Pattern p) throws IOException {
		ArrayList data = new ArrayList();
        
		String[] elems = readLineElems(p);
		while (elems != null) {
			if (elems.length > col) {
				data.add(elems[col]);
			}
			elems = readLineElems(p);
		}
		return data;
	}

	public void writeln(CharSequence csq) throws IOException {
		out.append(csq);
		out.append('\n');
	}

	public void writeln(String line) throws IOException {
		out.append(line);
		out.append('\n');
	}

	public void writeln() throws IOException {
		out.newLine();
	}

	public void append(char c) throws IOException {
		out.append(c);
	}

	public void append(CharSequence csq) throws IOException {
		out.append(csq);
	}

	public synchronized void writelnsynced(String str) throws IOException {
		this.writeln(str);
	}

	public void writelnTabDelimited(Object[] vals) throws IOException {
		String delim = "";
		for (Object val : vals) {
			out.write(delim);
			out.write(val.toString());
			delim = "\t";
		}
		writeln();
	}

	public void writelnDelimited(Object[] vals, Pattern p) throws IOException {
		String delim = "";
		for (Object val : vals) {
			out.write(delim);
			out.write(val.toString());
			delim = p.pattern();
		}
		writeln();
	}

	public Map readAsHashMap(int col1, int col2) throws IOException {
		Map output = new HashMap();
		String[] elems = readLineElems(tab);
		while (elems != null) {
			if (elems.length > col1 && elems.length > col2) {
				output.put(elems[col1], elems[col2]);
			}
			elems = readLineElems(tab);
		}
		return output;
	}

	public Map readAsHashMap(int col1, int col2, Pattern p) throws IOException {
		Map output = new HashMap();
		String[] elems = readLineElems(p);
		while (elems != null) {
			if (elems.length > col1 && elems.length > col2) {
				output.put(elems[col1], elems[col2]);
			}
			elems = readLineElems(p);
		}
		return output;
	}

	public Set readAsSet(int col, Pattern p) throws IOException {
		Set output = new HashSet();
		String[] elems = readLineElems(p);
		while (elems != null) {
			if (elems.length > col) {
				output.add(elems[col]);
			}
			elems = readLineElems(p);
		}
		return output;
	}

	public void writeList(List l) throws IOException {
		for (Object e : l) {
			this.writeln(e.toString());
		}
	}

	public String getFileName() {
		return file.getAbsolutePath();
	}

	public HashSet> readAsPairs(int A, int B) throws IOException {

		HashSet> output = new HashSet>();
		String[] elems = this.readLineElemsReturnObjects(tab);

		while (elems != null) {
			if (elems.length > A && elems.length > B) {
				output.add(new Pair(elems[A], elems[B]));
			}
			elems = this.readLineElemsReturnObjects(tab);
		}
		return output;
	}

	public void flush() throws IOException {
		out.flush();
	}

	public HashSet> readAsTriple(int A, int B, int C) throws IOException {
		HashSet> output = new HashSet>();
		String[] elems = this.readLineElemsReturnObjects(tab);

		while (elems != null) {
			if (elems.length > A && elems.length > B && elems.length > C) {
				output.add(new Triple(elems[A], elems[B], elems[C]));
			}
			elems = this.readLineElemsReturnObjects(tab);
		}
		return output;
	}

	@Override
	public Iterator iterator() {
		try {
			return new TextFileIterator(this);
		} catch (IOException ex) {
			throw new RuntimeException(ex);
		}
	}

	private static class TextFileIterator implements Iterator {

		private final TextFile textFile;
		String next;

		public TextFileIterator(TextFile textFile) throws IOException {
			this.textFile = textFile;
			next = textFile.readLine();
		}

		@Override
		public boolean hasNext() {
			return next != null;
		}

		@Override
		public String next() {
			String current = next;
			try {
				next = textFile.readLine();
			} catch (IOException ex) {
				throw new RuntimeException(ex);
			}
			return current;
		}

		@Override
		public void remove() {
			throw new UnsupportedOperationException("Not supported yet.");
		}
	}

	private static class TextFileIteratorElements implements Iterator {

		private final TextFile textFile;
		private final Pattern pattern;
		String[] next;

		public TextFileIteratorElements(TextFile textFile, Pattern p) throws IOException {
			this.textFile = textFile;
			this.pattern = p;
			next = textFile.readLineElemsReturnObjects(this.pattern);
		}

		@Override
		public boolean hasNext() {
			return next != null;
		}

		@Override
		public String[] next() {
			String[] current = next;
			try {
				next = textFile.readLineElemsReturnObjects(this.pattern);
			} catch (IOException ex) {
				throw new RuntimeException(ex);
			}
			return current;
		}

		@Override
		public void remove() {
			throw new UnsupportedOperationException("Not supported yet.");
		}
	}
	
	private static class TextFileIterableElements implements Iterable {

		private final TextFile textFile;
		private final Pattern pattern;

		public TextFileIterableElements(TextFile textFile, Pattern pattern) {
			this.textFile = textFile;
			this.pattern = pattern;
		}
		
		@Override
		public Iterator iterator() {
			try {
				return textFile.readLineElemsIterator(pattern);
			} catch (IOException ex) {
				throw new RuntimeException(ex);
			}
		}
		
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy