All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.geneweaver.io.reader.JaxIntervalEQTLReader Maven / Gradle / Ivy

There is a newer version: 2.7.12
Show newest version
package org.geneweaver.io.reader;

import java.io.UnsupportedEncodingException;
import java.lang.reflect.InvocationTargetException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

import org.apache.commons.beanutils.BeanMap;
import org.apache.commons.beanutils.BeanUtils;
import org.geneweaver.domain.EQTL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Reads the eQTL files for mouse from the work by Hao He to
 * produce these files. The processing of the eQTL rdata files
 * produces an EQTL csv file which we read with this reader in 
 * order to generate eQTL links in the graph.
		
 * @author gerrim
 *
 */
class JaxIntervalEQTLReader extends LineIteratorReader {
	
	private static Logger logger = LoggerFactory.getLogger(JaxIntervalEQTLReader.class);

	/**
	 * Create the reader by setting its data
	 * 
	 * @param reader
	 * @throws ReaderException
	 */
	@SuppressWarnings("unchecked")
	@Override
	public JaxIntervalEQTLReader init(ReaderRequest request) throws ReaderException {
		super.setup(request);
		setDelimiter(","); // Must be a , only
		return this;
	}

	private List headerNames;
	private Map headerValues;

	private List ignoredColumns = Arrays.asList("1p5lod", "inpaper");
	/**
	 * Creates the.
	 *
	 * @param line the line
	 * @return the n
	 * @throws ReaderException the reader exception
	 */
	@Override
	protected EQTL create(String line) throws ReaderException {

		if (headerNames==null || headerValues==null) {
			parseHeaders();
		}
		
		EQTL bean = new EQTL();
		bean.setTissueFileName(request.name());
		bean.setStudyId(createFudgedStudyId(request.name(), headerValues));
		bean.setType(EQTL.Type.INTERVAL);
		BeanMap d = new BeanMap(bean);
		
		String[] values = line.split(getDelimiter());
		
		if (values.length!=headerNames.size()) {
			throw new ReaderException("There are a different number of headers and values!");
		}
		
		for(int i=0;i type = d.getType(name);
			if ("NA".equals(value) && Number.class.isAssignableFrom(type)) {
				value = "0";
			}
			if (Integer.class.equals(type)) {
				value = Double.valueOf(value.toString()).intValue();
			} else if (Double.class.equals(type)) {
				value = Double.valueOf(value.toString());
			}
			
			try {
				if (value.toString().length()<1) continue;
				d.put(name, value);
			} catch (NumberFormatException ne) {
				logger.info("The property '"+name+"' cannot have value: "+values[i]);
				continue;
			} catch (IllegalArgumentException ie) {
				throw new ReaderException("Field "+name+" has type "+type+" which has not been parsed from "+value);
			}
		}
		
		headerValues.forEach((k,v)->{
			d.put(k,v);
		});
		
		return bean;
	}

	private String createFudgedStudyId(String name, Map headerValues) throws ReaderException {
		if (name == null) return null;
		try {
			if (headerValues.containsKey("studyId")) return  headerValues.get("studyId").toString();
			return "Project:"+Base64.getEncoder().encodeToString(name.getBytes("UTF-8"));
		} catch (UnsupportedEncodingException e) {
			throw new ReaderException(e);
		}
	}

	private DateFormat format1 = new SimpleDateFormat("MM/dd/yyyy");
	private DateFormat format2 = new SimpleDateFormat("yyyy-MM-dd");
	
	@Override
	protected Map parseHeaders() throws ReaderException {
		
		if (header==null || header.isEmpty()) {
			throw new ReaderException("JAX eQTL files must have a header!");
		}
		
		// Header names
		String headLine = header.get(header.size()-1);
		String[] names = headLine.substring(1).split(getDelimiter());
		
		// Something like: marker,chr,bp_mm10,rs_id,gene_id
		headerNames = new ArrayList<>();
		for (int i = 0; i < names.length; i++) {
			String name = names[i];
			name = name.replace("_", "");
			name = name.replace(" ", "");
			String lc = name.toLowerCase();
			if (lc.equals("rsid"))   name = "rsId";
			if (lc.equals("rid"))    name = "rsId";
			if (lc.equals("geneid")) name = "geneId";
			if (lc.equals("genename")) name = "geneName";
			if (lc.equals("bpmm10")) name = "bp";
			// TODO lod
			headerNames.add(name);
		}
		
		// Header values
		headerValues = new HashMap<>();
		for (int i = 0; i < header.size()-1; i++) {
			String hline = header.get(i).substring(1);
			String[] kvs = hline.split(":");
			
			String name = kvs[0].toLowerCase().trim();
			name = name.replace("_", "");
			name = name.replace(" ", "");
			Object value = kvs[1].trim();
			
			// Make all eQTLs have same field names, even if from human data or mouse data.
			if (name.equals("ensembl.version")) name = "version";
			if (name.equals("tissue")) {
				name = "tissueName";
				value = value.toString().toLowerCase();
			}
			if (name.equals("studyid"))name = "studyId";
			if (name.equals("rsid"))   name = "rsId";
			if (name.equals("rid"))    name = "rsId";
			if (name.equals("geneid")) name = "geneId";
			if (name.equals("genename")) name = "geneName";
			if (name.equals("bpmm10")) name = "bp";
			if (name.equals("species")) continue; // Repeated information
			if (name.equals("url")) name = "source";
			if (name.equals("date")) {
				try {
					value = format1.parse(value.toString());
				} catch (ParseException e) {
					try {
						value = format2.parse(value.toString());
					} catch (ParseException eOther) {
						throw new ReaderException("Cannot parse date: "+value);
					}
				}
				continue;// We do not repeat date
			}
			headerValues.put(name, value);
		}
		return headerValues;
	}

	protected void addHeader(String line) {
		headerNames = null;
		headerValues = null;
		super.addHeader(line);
	}

	/**
	 * This stream must not be used in parallel stream programming.
	 * Use forkJoinStream() instead.
	 *
	 * @return the stream
	 */
	public Stream stream() {
		return super.stream()
			 .flatMap(n->expand(n));
	}

	private Stream expand(EQTL n) {
		if (!(n instanceof EQTL)) return Stream.of(n);
		EQTL eqtl = (EQTL)n;
		String rid = eqtl.getRsId();
		if (rid.startsWith("rs") && rid.indexOf(' ')>-1) {
			String[] rids = rid.split("\\s+");
			return Stream.of(rids).map(r->clone(r, eqtl));
		}
		return Stream.of(n);
	}

	private EQTL clone(String rsId, EQTL eqtl) {
		EQTL ret = new EQTL();
		try {
			BeanUtils.copyProperties(ret, eqtl);
		} catch (IllegalAccessException | InvocationTargetException e) {
			throw new IllegalArgumentException("Cannot copy properties of EQTL for variant "+rsId);
		}
		ret.setRsId(rsId);
		return ret;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy