All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.senseidb.indexing.hadoop.reduce.SenseiReducer Maven / Gradle / Ivy

There is a newer version: 2.0.1
Show newest version
package com.senseidb.indexing.hadoop.reduce;

import java.io.File;
import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Closeable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.Logger;

import com.senseidb.indexing.hadoop.keyvalueformat.IntermediateForm;
import com.senseidb.indexing.hadoop.keyvalueformat.Shard;
import com.senseidb.indexing.hadoop.util.MRConfig;

public class SenseiReducer extends MapReduceBase implements
		Reducer {
	
	private static final Logger logger = Logger.getLogger(SenseiReducer.class);
	static final Text DONE = new Text("done");
	

	private Configuration iconf;
	private String mapredTempDir;
	  
    public void reduce(Shard key, Iterator values,
                       OutputCollector output, 
                       Reporter reporter) throws IOException {

    	logger.info("Construct a shard writer for " + key);
        FileSystem fs = FileSystem.get(iconf);
        //debug:
        logger.info("filesystem is: "+ fs.getName());
        String temp = mapredTempDir + Path.SEPARATOR + "shard_" + key.toFlatString()+ "_" + System.currentTimeMillis();
        logger.info("mapredTempDir is: "+ mapredTempDir);
        final ShardWriter writer = new ShardWriter(fs, key, temp, iconf);

        // update the shard
        while (values.hasNext()) {
          IntermediateForm form = values.next();
          writer.process(form);
          reporter.progress();
        }

        // close the shard
        final Reporter fReporter = reporter;
        new Closeable() {
          volatile boolean closed = false;

          public void close() throws IOException {
            // spawn a thread to give progress heartbeats
            Thread prog = new Thread() {
              public void run() {
                while (!closed) {
                  try {
                    fReporter.setStatus("closing");
                    Thread.sleep(1000);
                  } catch (InterruptedException e) {
                    continue;
                  } catch (Throwable e) {
                    return;
                  }
                }
              }
            };

            try {
              prog.start();

              if (writer != null) {
            	writer.optimize();  //added this option to optimize after all the docs have been added;
                writer.close();
              }
            } finally {
              closed = true;
            }
          }
        }.close();
        logger.info("Closed the shard writer for " + key + ", writer = " + writer);
    	
    	
      output.collect(key, DONE);
    }
    
    @Override
    public void close() throws IOException {
    	if(mapredTempDir != null){
    		File file = new File(mapredTempDir);
    		if(file.exists())
    			deleteDir(file);
    	}
    }
    
    static void deleteDir(File file)
    {
      if (file == null || !file.exists())
        return;
      for (File f : file.listFiles())
      {
        if (f.isDirectory())
          deleteDir(f);
        else
          f.delete();
      }
      file.delete();
    }
    
    public void configure(JobConf job) {
        iconf = job;
        mapredTempDir = iconf.get(MRConfig.TEMP_DIR);
        mapredTempDir = Shard.normalizePath(mapredTempDir);
      }
  }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy