All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.geneweaver.io.reader.ReaderFactory Maven / Gradle / Ivy
/*-
*
* Copyright 2018, 2020 The Jackson Laboratory Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @author Matthew Gerring
*/
package org.geneweaver.io.reader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
/**
* Simple factory for getting reader by file extension.
*
* @author gerrim
*
*/
public class ReaderFactory {
/** The Constant classes. */
private static final Map classes;
static {
Map tmp = new LinkedHashMap<>();
// These guys are fairly standard I think.
tmp.put("gtf", GeneReader.class);
tmp.put("gvf", VariantReader.class);
tmp.put("vcf", FastVCFReader.class);
tmp.put("bed", BedReader.class);
tmp.put("gff", RegulatoryFeatureReader.class);
tmp.put("step", StepReader.class);
// If there are multiple xls formats, we will have to ask
// if it is applicable for a given format and reader request.
tmp.put("xls", ChiapetReader.class);
// If there are multiple tsv formats, we will have to ask
// if it is applicable for a given format and reader request.
tmp.put("tsv", Arrays.asList(Fantom5EnsemblMapReader.class, MapCSVReader.class));
tmp.put("txt", MapCSVReader.class);
// If there are multiple rpt formats, we will have to ask
// if it is applicable for a given format and reader request.
tmp.put("rpt", HomologGeneReader.class);
// This one is for the jax csv files which are parsed out of mouse eQTL data.
tmp.put(Pattern.compile("^.+\\_balyor\\.csv(\\.gz)?$"), OrthologBaylorReader.class);
tmp.put("csv", Arrays.asList(JaxEQTLReader.class, MapCSVReader.class, JaxIntervalEQTLReader.class));
// These eQTLs are from this paper: https://www.biorxiv.org/content/10.1101/655670v1
// And these files: https://zenodo.org/record/3408356#.YQljwlNKii6
tmp.put(Pattern.compile("^(.+)_.+_eQTLs.txt(\\.gz)?$"), FlexEQTLReader.class);
// @see https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/README_eQTL_v8.txt
tmp.put(Pattern.compile("^.+\\.egenes\\.txt(\\.gz)?$"), GTExEQTLReader.class);
tmp.put(Pattern.compile("^.+\\.sgenes\\.txt(\\.gz)?$"), GTExEQTLReader.class);
tmp.put(Pattern.compile("^.+\\.signif_variant_gene_pairs\\.txt(\\.gz)?$"), GTExEQTLReader.class);
tmp.put(Pattern.compile("^.+\\.sqtl_signifpairs\\.txt(\\.gz)?$"), GTExEQTLReader.class);
tmp.put(Pattern.compile("^.+\\.allpairs\\.txt(\\.gz)?$"), GTExEQTLReader.class);
tmp.put(Pattern.compile("^.+\\.sqtl_allpairs\\.txt(\\.gz)?$"), GTExEQTLReader.class);
// This is read directly into a database in EQTLFunction
//tmp.put("^.+\\.lookup_table\\.txt(\\.gz)?$", GTExEQTLReader.class);
tmp.put(Pattern.compile("^GTEx.+Annotations.+Sample.+.txt(\\.gz)?$"), GTExSampleReader.class);
// Archive Reader just calls back this reader with each entry
tmp.put("tar", ArchiveReader.class);
tmp.put("zip", ArchiveReader.class);
classes = Collections.unmodifiableMap(tmp);
}
/**
* Get a reader using the file extension to find the correct one.
*
* @param the generic type
* @param species the species
* @param file the file
* @return the reader
* @throws ReaderException the reader exception
*/
public static , T> R getReader(ReaderRequest request) throws ReaderException {
Class clazz = getClass(request);
try {
Constructor constructor = clazz.getDeclaredConstructor();
R instance = constructor.newInstance();
if (request.isInitRequired()) {
Method init = clazz.getMethod(StreamReader.INIT, ReaderRequest.class);
init.invoke(instance, request);
}
return instance;
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException
| NoSuchMethodException | SecurityException e) {
throw new ReaderException(e);
}
}
/**
* Gets the class.
*
* @param the generic type
* @param name the name
* @return the class
* @throws ReaderException the reader exception
*/
private static , T> Class getClass(ReaderRequest request) throws ReaderException {
// Figure out reader from name. Later we may need more complex logic.
Class clazz = getClassByName(request);
if (clazz!=null) return clazz;
throw new ReaderException("There is no reader for "+request.name());
}
/**
* Check if a given reader request would result in a valid reader class.
* @param request
* @return true if we have a reader!
* @throws ReaderException
*/
public static boolean isSupported(ReaderRequest request) throws ReaderException {
Class> clazz = getClassByName(request);
return clazz!=null;
}
@SuppressWarnings("unchecked")
private static , T> Class getClassByName(ReaderRequest request) throws ReaderException{
String name = request.name();
// Unfortunately we have to loop here because files with the
// same extension can have different readers, e.g. txt, csv.
Object found = null;
// Process the patterns first, all of them
for (Object key : classes.keySet()) {
if (key instanceof Pattern) {
Pattern pattern = (Pattern)key;
Matcher matcher = pattern.matcher(name);
if (matcher.matches()) {
request.setMatcher(matcher);
found = classes.get(key);
break;
}
}
}
// Process the direct keys
if (found==null) for (Object key : classes.keySet()) {
if (key instanceof String) {
String ext = FilenameUtils.getExtension(name);
if ("gz".equals(ext)) {
ext = FilenameUtils.getExtension(name.substring(0, name.length()-3));
}
if (ext==null) throw new ReaderException(name+" does not have an extension!");
ext = ext.toLowerCase();
if (key.toString().toLowerCase().equals(ext)) {
found = classes.get(ext);
break;
}
}
}
if (found!=null) {
if (found instanceof Class) {
return (Class)found;
} else if (found instanceof Collection) {
if (request.getReaderHint()==null) {
return (Class)((Collection)found).iterator().next();
} else {
String hint = request.getReaderHint();
for (Iterator> it = ((Collection>)found).iterator(); it.hasNext();) {
Class clazz = it.next();
if (clazz.getName().contains(hint)) return clazz;
}
}
}
}
return null;
}
}