![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.mapper.Mapper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.mapper;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* Utility Class for parallelizing a collection of items which need to be parallized. To use this you must
* extends MapWorker- which will process an instance of Item. When you call doMapping(Iterator
- ,bufSize),
* we take bufSize eleems out of the iterator and distribute the processing of those items, then take bufSize more,
* and so on.
*
* @author aria42
*
* @param
-
*/
public class Mapper
- {
private int numWorkers ;
private MapWorkerFactory
- factory;
public Mapper(MapWorkerFactory
- factory) {
this.factory = factory;
this.numWorkers = Runtime.getRuntime().availableProcessors();
}
public Mapper(final Class c) {
this(new MapWorkerFactory
- () {
public MapWorker
- newMapWorker() {
try {
return (MapWorker
- ) c.newInstance();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
});
}
public void setNumWorkers(int numWorkers) {
this.numWorkers = numWorkers;
}
public List
> doMapping(List- items) {
List
> workers = new ArrayList>();
for (int i=0; i < numWorkers; ++i) {
MapWorker- worker = factory.newMapWorker();
workers.add(worker);
}
doMapping(items, workers);
return workers;
}
private void doMapping(List
- items, List
> workers) {
ExecutorService executor = Executors.newFixedThreadPool(workers.size());
for (int i=0; i < workers.size(); ++i) {
int start = (int) ((i/(double) workers.size()) * items.size());
int end = (int) (((i+1)/(double) workers.size()) * items.size());
List- localItems = items.subList(start, end);
MapWorker
- worker = workers.get(i);
worker.setItems(localItems);
executor.execute(worker);
}
execute(executor);
for (MapWorker
- worker : workers) {
worker.reduce();
}
}
private void execute(ExecutorService executor) {
executor.shutdown();
try {
executor.awaitTermination(10000, TimeUnit.SECONDS);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
public List
> doMapping(Iterator- itemIt) {
return doMapping(itemIt, 10000);
}
public List
> doMapping(Iterator- itemIt, int bufSize) {
List
> workers = new ArrayList>();
int numProcessed = 0;
for (int i=0; i < numWorkers; ++i) {
MapWorker- worker = factory.newMapWorker();
workers.add(worker);
}
while (itemIt.hasNext()) {
List
- items = new ArrayList
- ();
for (int i=0; i < bufSize; ++i) {
if (!itemIt.hasNext()) break;
items.add(itemIt.next());
}
doMapping(items, workers);
System.gc();
numProcessed += bufSize;
//System.out.println("[Mapper] done processing " + numProcessed);
}
return workers;
}
public Object getNumWorkers() {
return numWorkers;
}
public static void main(String[] args) {
class MyMapper extends MapWorker
{
public void map(Integer item) {
System.out.println("\tProcessing " + item);
}
}
MapWorkerFactory factory = new MapWorkerFactory() {
public MapWorker newMapWorker() {
return new MyMapper();
}
};
Mapper mapper = new Mapper(factory);
List items = new ArrayList();
for (int i=0; i < 10000; ++i) {
items.add(i);
}
mapper.doMapping(items.iterator(),10);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy