org.geneweaver.io.reader.JaxIntervalEQTLReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gweaver-stream-io Show documentation
Show all versions of gweaver-stream-io Show documentation
The IO bundle for Geneweaver.
package org.geneweaver.io.reader;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.InvocationTargetException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.commons.beanutils.BeanMap;
import org.apache.commons.beanutils.BeanUtils;
import org.geneweaver.domain.EQTL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Reads the eQTL files for mouse from the work by Hao He to
* produce these files. The processing of the eQTL rdata files
* produces an EQTL csv file which we read with this reader in
* order to generate eQTL links in the graph.
* @author gerrim
*
*/
class JaxIntervalEQTLReader extends LineIteratorReader {
private static Logger logger = LoggerFactory.getLogger(JaxIntervalEQTLReader.class);
/**
* Create the reader by setting its data
*
* @param reader
* @throws ReaderException
*/
@SuppressWarnings("unchecked")
@Override
public JaxIntervalEQTLReader init(ReaderRequest request) throws ReaderException {
super.setup(request);
setDelimiter(","); // Must be a , only
return this;
}
private List headerNames;
private Map headerValues;
private List ignoredColumns = Arrays.asList("1p5lod", "inpaper");
/**
* Creates the.
*
* @param line the line
* @return the n
* @throws ReaderException the reader exception
*/
@Override
protected EQTL create(String line) throws ReaderException {
if (headerNames==null || headerValues==null) {
parseHeaders();
}
EQTL bean = new EQTL();
bean.setTissueFileName(request.name());
bean.setStudyId(createFudgedStudyId(request.name(), headerValues));
bean.setType(EQTL.Type.INTERVAL);
BeanMap d = new BeanMap(bean);
String[] values = line.split(getDelimiter());
if (values.length!=headerNames.size()) {
throw new ReaderException("There are a different number of headers and values!");
}
for(int i=0;i type = d.getType(name);
if ("NA".equals(value) && Number.class.isAssignableFrom(type)) {
value = "0";
}
if (Integer.class.equals(type)) {
value = Double.valueOf(value.toString()).intValue();
} else if (Double.class.equals(type)) {
value = Double.valueOf(value.toString());
}
try {
if (value.toString().length()<1) continue;
d.put(name, value);
} catch (NumberFormatException ne) {
logger.info("The property '"+name+"' cannot have value: "+values[i]);
continue;
} catch (IllegalArgumentException ie) {
throw new ReaderException("Field "+name+" has type "+type+" which has not been parsed from "+value);
}
}
headerValues.forEach((k,v)->{
d.put(k,v);
});
return bean;
}
private String createFudgedStudyId(String name, Map headerValues) throws ReaderException {
if (name == null) return null;
try {
if (headerValues.containsKey("studyId")) return headerValues.get("studyId").toString();
return "Project:"+Base64.getEncoder().encodeToString(name.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new ReaderException(e);
}
}
private DateFormat format1 = new SimpleDateFormat("MM/dd/yyyy");
private DateFormat format2 = new SimpleDateFormat("yyyy-MM-dd");
@Override
protected Map parseHeaders() throws ReaderException {
if (header==null || header.isEmpty()) {
throw new ReaderException("JAX eQTL files must have a header!");
}
// Header names
String headLine = header.get(header.size()-1);
String[] names = headLine.substring(1).split(getDelimiter());
// Something like: marker,chr,bp_mm10,rs_id,gene_id
headerNames = new ArrayList<>();
for (int i = 0; i < names.length; i++) {
String name = names[i];
name = name.replace("_", "");
name = name.replace(" ", "");
String lc = name.toLowerCase();
if (lc.equals("rsid")) name = "rsId";
if (lc.equals("rid")) name = "rsId";
if (lc.equals("geneid")) name = "geneId";
if (lc.equals("genename")) name = "geneName";
if (lc.equals("bpmm10")) name = "bp";
// TODO lod
headerNames.add(name);
}
// Header values
headerValues = new HashMap<>();
for (int i = 0; i < header.size()-1; i++) {
String hline = header.get(i).substring(1);
String[] kvs = hline.split(":");
String name = kvs[0].toLowerCase().trim();
name = name.replace("_", "");
name = name.replace(" ", "");
Object value = kvs[1].trim();
// Make all eQTLs have same field names, even if from human data or mouse data.
if (name.equals("ensembl.version")) name = "version";
if (name.equals("tissue")) {
name = "tissueName";
value = value.toString().toLowerCase();
}
if (name.equals("studyid"))name = "studyId";
if (name.equals("rsid")) name = "rsId";
if (name.equals("rid")) name = "rsId";
if (name.equals("geneid")) name = "geneId";
if (name.equals("genename")) name = "geneName";
if (name.equals("bpmm10")) name = "bp";
if (name.equals("species")) continue; // Repeated information
if (name.equals("url")) name = "source";
if (name.equals("date")) {
try {
value = format1.parse(value.toString());
} catch (ParseException e) {
try {
value = format2.parse(value.toString());
} catch (ParseException eOther) {
throw new ReaderException("Cannot parse date: "+value);
}
}
continue;// We do not repeat date
}
headerValues.put(name, value);
}
return headerValues;
}
protected void addHeader(String line) {
headerNames = null;
headerValues = null;
super.addHeader(line);
}
/**
* This stream must not be used in parallel stream programming.
* Use forkJoinStream() instead.
*
* @return the stream
*/
public Stream stream() {
return super.stream()
.flatMap(n->expand(n));
}
private Stream expand(EQTL n) {
if (!(n instanceof EQTL)) return Stream.of(n);
EQTL eqtl = (EQTL)n;
String rid = eqtl.getRsId();
if (rid.startsWith("rs") && rid.indexOf(' ')>-1) {
String[] rids = rid.split("\\s+");
return Stream.of(rids).map(r->clone(r, eqtl));
}
return Stream.of(n);
}
private EQTL clone(String rsId, EQTL eqtl) {
EQTL ret = new EQTL();
try {
BeanUtils.copyProperties(ret, eqtl);
} catch (IllegalAccessException | InvocationTargetException e) {
throw new IllegalArgumentException("Cannot copy properties of EQTL for variant "+rsId);
}
ret.setRsId(rsId);
return ret;
}
}