de.julielab.genemapper.resources.BioThesaurusIproClassMerger Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of gene-mapper-resources Show documentation
This project assembles code and files required to build the dictionaries and indexes used by the JCoRe Gene Mapper.
The newest version!
/** 
 * BioThesaurusIproClassMerger.java
 * 
 * Copyright (c) 2007, JULIE Lab. 
 * All rights reserved. This program and the accompanying materials 
 * are made available under the terms of the Common Public License v1.0 
 *
 * Author: kampe
 * 
 * Current version: 1.3 	
 * Since version:   1.3
 *
 * Creation date: Aug 13, 2007 
 * 
 * Merges IproClass columns UniRef_90, UniRef_50 & Taxon ID into
 * BioThesaurus.
 **/

package de.julielab.genemapper.resources;

import java.io.*;
import java.util.HashMap;

public class BioThesaurusIproClassMerger {

	HashMap iproCols;

	BioThesaurusIproClassMerger() {
		this.iproCols = new HashMap();
	}

	/**
	 * reads in iproclass file
	 * 
	 * @param iproFile
	 *            the iproclass file
	 * @param allColumnsRead
	 *            whether iproclass file contains all columns or just the
	 *            relevant ones (used to save disk space and speed up merging)
	 */
	private void importIproClass(File iproFile, boolean allColumnsRead) {
		try (BufferedReader iproReader = new BufferedReader(new FileReader(
				iproFile))){
			
			String line;
			int counter = 1;
			System.out.println("Importing lines from IproClass!");
			while ((line = iproReader.readLine()) != null) {
				String[] split = line.split("\t");
				// System.out.println(Arrays.toString(split));
				if (allColumnsRead) {
					iproCols.put(split[0].intern(), new String[] { split[1].intern(), split[11].intern(), split[12].intern(),
							split[15].intern() });
				} else {
					iproCols.put(split[0].intern(), new String[] { split[1].intern(), split[2].intern(),
							split[3].intern(), split[4].intern() });
				}

				if (counter % 100000 == 0) {
					System.out.println(counter);
				}
				++counter;
			}
			System.out.println("iproclass: " + iproCols.size()
					+ " lines imported!");

		} catch (FileNotFoundException file) {
			file.printStackTrace();
		} catch (IOException io) {
			io.printStackTrace();
		}
	}

	
	/**
	 * appends cols from iproclass to biothesaurus
	 * @param btFile
	 * @param outputFile
	 */
	private void appendCols(File btFile, String outputFile) {
		try (BufferedReader bioReader = new BufferedReader(
				new FileReader(btFile));
		BufferedWriter bioWriter = new BufferedWriter(new FileWriter(
				outputFile))){
			
			String line;
			System.out.println("Appending columns to BioThesaurus: ");
			int counter = 1;
			while ((line = bioReader.readLine()) != null) {
				String[] split = line.split("\t");
				if (iproCols.containsKey(split[0].intern())) {
					String[] ipro = iproCols.get(split[0].intern());
					line = line + "\t" + ipro[0].intern() + "\t" + ipro[1].intern() + "\t" + ipro[2].intern() + "\t" + ipro[3].intern() + "\n";
				//} else {
				//	line = line + "\tnull\tnull\tnull\n";
				//}
				bioWriter.write(line);
				bioWriter.flush();
				}
				if (counter % 10000 == 0) {
					System.out.println(counter);
				}
				++counter;
			}
			bioReader.close();
			bioWriter.close();
			System.out.println("Finished merging!");
		} catch (FileNotFoundException file) {
			file.printStackTrace();
		} catch (IOException io) {
			io.printStackTrace();
		}

	}

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		BioThesaurusIproClassMerger merger = new BioThesaurusIproClassMerger();
		if (args.length == 4) {
			File btFile = new File(args[0]);
			File iproFile = new File(args[1]);
			String outputFile = args[2];
			boolean allCols = new Boolean(args[3]);
			if (btFile.isFile() && iproFile.isFile()) {
				merger.importIproClass(iproFile,allCols);
				merger.appendCols(btFile, outputFile);
			} else {
				System.err.println("Could not find one or both files!");
			}

		} else {
			System.err
					.println("Too many/few arguments\nUsage:    ");
		}
	}

}