All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.rcsb.mmtf.spark.mappers.GenerateSegments Maven / Gradle / Ivy

Go to download

MMTF Spark is a series of libraries and functions for using MMTF with Spark.

There is a newer version: 0.0.8
Show newest version
package org.rcsb.mmtf.spark.mappers;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.vecmath.Point3d;

import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.rcsb.mmtf.api.StructureDataInterface;
import org.rcsb.mmtf.spark.data.Segment;

import scala.Tuple2;

/**
 * A mapper from {@link StructureDataInterface} to the {@link Point3d}[] of the calpha coordinates.
 * @author Anthony Bradley
 *
 */
public class GenerateSegments implements PairFlatMapFunction,String, Segment> {

	/** Define the length of a fragment. Null implies each chain is a fragment */
	private Integer fragmentLength;
	
	/**
	 * Constructor of the class. 
	 * @param fragmentLength the length of each fragment. Null means take each Chain as 
	 * as single fragment.
	 */
	public GenerateSegments(Integer fragmentLength) {
		this.fragmentLength = fragmentLength;
	}
	
	/**
	 * The serial id for this version of the class.
	 */
	private static final long serialVersionUID = -1187474691802866518L;

	@Override
	public Iterable> call(Tuple2 t) throws Exception {
		StructureDataInterface structureDataInterface = t._2;
		List> outList = new ArrayList<>();
		// Get the PDB id
		String pdbId = structureDataInterface.getStructureId();
		Map chainIndexToEntityTypeMap = getChainEntity(structureDataInterface);
		int atomCounter = 0;
		int groupCounter = 0;
		// Now loop through the entities
		for(int i=0; i fragList = new ArrayList<>();
				String sequence = "";
				for(int groupId=0; groupId(chainId+fragCounter, 
								new Segment(sequence, fragList.toArray(new Point3d[fragmentLength]))));
						fragList.remove(fragmentLength-1);
						sequence = sequence.substring(1, sequence.length());
						fragCounter++;
					}
				}
				if (fragmentLength==null && fragList.size()!=0) {
					outList.add(new Tuple2(chainId, 
							new Segment(sequence, fragList.toArray(new Point3d[fragList.size()]))));
				}
				
			}
		}
		return outList;
	}


	/**
	 * Get a map of chain index to the entity type.
	 * @param structureDataInterface the input {@link StructureDataInterface}
	 * @return the map of chain indices to the entity type
	 */
	private Map getChainEntity(StructureDataInterface structureDataInterface) {
		Map outMap = new HashMap<>();
		for(int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy