All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.mapper.Mapper Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.mapper;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

/**
 * Utility Class for parallelizing a collection of items which need to be parallized. To use this you must
 * extends MapWorker which will process an instance of Item. When you call doMapping(Iterator,bufSize),
 * we take bufSize eleems out of the iterator and distribute the processing of those items, then take bufSize more,
 * and so on.
 * 
 * @author aria42
 *
 * @param 
 */
public class Mapper {

	private int numWorkers ;
	private MapWorkerFactory factory;

	public Mapper(MapWorkerFactory factory) {
		this.factory = factory;
		this.numWorkers = Runtime.getRuntime().availableProcessors();
	}

  public Mapper(final Class c) {
    this(new MapWorkerFactory() {
      public MapWorker newMapWorker() {
        try {
          return (MapWorker) c.newInstance();
        } catch (Exception e) {
          e.printStackTrace();
        }
        return null;
      }
    });
  }

	public void setNumWorkers(int numWorkers) {
		this.numWorkers = numWorkers;	
	}


	public List> doMapping(List items) {

		List> workers = new ArrayList>();
		for (int i=0; i < numWorkers; ++i) {
			MapWorker worker = factory.newMapWorker();
			workers.add(worker);
		}
		doMapping(items, workers);
		return workers;
	}

	private void doMapping(List items, List> workers) {
		ExecutorService executor = Executors.newFixedThreadPool(workers.size());
		for (int i=0; i < workers.size(); ++i) {
			int start = (int) ((i/(double) workers.size()) * items.size());
			int end = (int) (((i+1)/(double) workers.size()) * items.size());
			List localItems = items.subList(start, end);
			MapWorker worker = workers.get(i);
			worker.setItems(localItems);
			executor.execute(worker);
		}
		execute(executor);
    for (MapWorker worker : workers) {
      worker.reduce();
    }
	}

	private void execute(ExecutorService executor) {
		executor.shutdown();
		try {
			executor.awaitTermination(10000, TimeUnit.SECONDS);
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}

	public List> doMapping(Iterator itemIt) { 
		return doMapping(itemIt, 10000);
	}

	public List> doMapping(Iterator itemIt, int bufSize) {
		List> workers = new ArrayList>();
    int numProcessed = 0;
		for (int i=0; i < numWorkers; ++i) {
			MapWorker worker = factory.newMapWorker();
			workers.add(worker);
		}
		while (itemIt.hasNext()) {			
			List items = new ArrayList();
			for (int i=0; i < bufSize; ++i) {
				if (!itemIt.hasNext())  break;
				items.add(itemIt.next());
			}
			doMapping(items, workers);
      System.gc();
      numProcessed += bufSize;
      //System.out.println("[Mapper] done processing " + numProcessed);
		}
		return workers;
	}

	public Object getNumWorkers() {
		return numWorkers;
	}


	public static void main(String[] args) {
		class MyMapper extends MapWorker {
			public void map(Integer item) {
				System.out.println("\tProcessing " + item);
			}	
		}
		MapWorkerFactory factory = new MapWorkerFactory() {
			public MapWorker newMapWorker() {
				return new MyMapper();
			}			
		};
		Mapper mapper = new Mapper(factory);
		List items = new ArrayList();
		for (int i=0; i < 10000; ++i) {
			items.add(i);
		}
		mapper.doMapping(items.iterator(),10);
	}	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy