All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dell.doradus.olap.merge.Merger Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2014 Dell, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dell.doradus.olap.merge;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.dell.doradus.common.ApplicationDefinition;
import com.dell.doradus.common.FieldDefinition;
import com.dell.doradus.common.FieldType;
import com.dell.doradus.common.TableDefinition;
import com.dell.doradus.core.ServerConfig;
import com.dell.doradus.olap.io.VDirectory;
import com.dell.doradus.olap.store.FieldSearcher;
import com.dell.doradus.olap.store.FieldWriter;
import com.dell.doradus.olap.store.FieldWriterSV;
import com.dell.doradus.olap.store.IdReader;
import com.dell.doradus.olap.store.IdWriter;
import com.dell.doradus.olap.store.InverseLinkWriter;
import com.dell.doradus.olap.store.NumSearcherMV;
import com.dell.doradus.olap.store.NumWriter;
import com.dell.doradus.olap.store.NumWriterMV;
import com.dell.doradus.olap.store.SegmentStats;
import com.dell.doradus.olap.store.ValueReader;
import com.dell.doradus.olap.store.ValueWriter;
import com.dell.doradus.search.util.HeapList;
import com.dell.doradus.utilities.Timer;

public class Merger {
    private static Logger LOG = LoggerFactory.getLogger("Olap.Merger");
    private static ExecutorService executor = ServerConfig.getInstance().olap_merge_threads == 0 ? null :
    		Executors.newFixedThreadPool(ServerConfig.getInstance().olap_merge_threads);
    
    private ApplicationDefinition appDef;
    private List sources;
    private VDirectory destination;
    private SegmentStats stats;
    private Map remaps = new HashMap();
    private Object m_syncRoot = new Object();
	
	public static void mergeApplication(ApplicationDefinition appDef, List sources, VDirectory destination) {
		Merger m = new Merger(appDef, sources, destination);
		if(executor != null) m.mergeApplicationWithThreadPool();
		else m.mergeApplication();
	}

	public Merger(ApplicationDefinition appDef, List sources, VDirectory destination) {
		this.appDef = appDef;
		this.sources = sources;
		this.destination = destination;
	}

	public void mergeApplicationWithThreadPool() {
		try {
			Timer timer = new Timer();
			LOG.debug("Merging application {}", appDef.getAppName());
			stats = new SegmentStats();
			List> futures = new ArrayList<>();
			for(TableDefinition tableDef : appDef.getTableDefinitions().values()) {
				final TableDefinition fTableDef = tableDef;
				final String table = tableDef.getTableName();
				futures.add(executor.submit(new Runnable() {
					@Override public void run() {
						LOG.debug("   Merging {}", table);
						mergeDocs(fTableDef);
					}}));
			}
			for(Future f: futures) f.get();
			futures.clear();
			for(TableDefinition tableDef : appDef.getTableDefinitions().values()) {
				LOG.debug("   Merging fields of table {}", tableDef.getTableName());
				for(FieldDefinition fieldDef : tableDef.getFieldDefinitions()) {
					final FieldDefinition fFieldDef = fieldDef;
					futures.add(executor.submit(new Runnable() {
						@Override public void run() {
							LOG.debug("      Merging {}/{} ({})", new Object[] {fFieldDef.getTableName(), fFieldDef.getName(), fFieldDef.getType()});
							mergeField(fFieldDef);
						}}));
				}
			}
			for(Future f: futures) f.get();
			futures.clear();
			stats.totalStoreSize = destination.totalLength(false);
			stats.save(destination);
			LOG.debug("Application {} merged in {}", appDef.getAppName(), timer);
		}catch(ExecutionException ee) {
			throw new RuntimeException(ee);
		}catch(InterruptedException ee) {
			throw new RuntimeException(ee);
		}
	}
	
	
	
	public void mergeApplication() {
		Timer timer = new Timer();
		LOG.debug("Merging application {}", appDef.getAppName());
		stats = new SegmentStats();
		for(TableDefinition tableDef : appDef.getTableDefinitions().values()) {
			String table = tableDef.getTableName();
			LOG.debug("   Merging {}", table);
			mergeDocs(tableDef);
		}
		for(TableDefinition tableDef : appDef.getTableDefinitions().values()) {
			String table = tableDef.getTableName();
			LOG.debug("   Merging fields of table {}", table);
			for(FieldDefinition fieldDef : tableDef.getFieldDefinitions()) {
				LOG.debug("      Merging {}/{} ({})", new Object[] {table, fieldDef.getName(), fieldDef.getType()});
				mergeField(fieldDef);
			}
		}
		
		stats.totalStoreSize = destination.totalLength(false);
		stats.save(destination);
		LOG.debug("Application {} merged in {}", appDef.getAppName(), timer);
	}
	
    private void mergeDocs(TableDefinition tableDef) {
    	String table = tableDef.getTableName();
        Remap remap = new Remap(sources.size());
        IdWriter id_writer = new IdWriter(destination, table);
        
        HeapList heap = new HeapList(sources.size() - 1);
        IxDoc current = null;
        for(int i = 0; i < sources.size(); i++) {
            current = new IxDoc(i, new IdReader(sources.get(i), table));
            current.next();
            current = heap.AddEx(current);
        }

        while (current.id != null)
        {
        	int dstDoc = id_writer.add(current.id);
            remap.set(current.segment, current.reader.cur_number, dstDoc);
            if(current.reader.is_deleted) {
            	remap.setDeleted(current.segment, current.reader.cur_number, dstDoc);
            	id_writer.removeLastId(current.id);
            }
            current.next();
            current = heap.AddEx(current);
        }
        
        remap.shrink();
        id_writer.close();
        synchronized (m_syncRoot) {
            stats.addTable(table, id_writer.size());
            remaps.put(table, remap);
		}
    }
    
	private void mergeField(FieldDefinition fieldDef) {
		if(fieldDef.getType() == FieldType.TEXT || fieldDef.getType() == FieldType.BINARY) {
			mergeTextField(fieldDef);
		} else if(fieldDef.isLinkField()) {
			mergeLinkField(fieldDef);
		} else if(NumSearcherMV.isNumericType(fieldDef.getType())) {
			mergeNumField(fieldDef);
		} else if(fieldDef.isGroupField() || fieldDef.isXLinkField()) {
			// do nothing
		} else throw new RuntimeException("Unsupported field type: " + fieldDef.getType());
	}
	
    private void mergeNumField(FieldDefinition fieldDef)
    {
		String table = fieldDef.getTableName();
		String field = fieldDef.getName();
		Remap remap = remaps.get(table);
		if(remap.dstSize() == 0) return;
		
        if(fieldDef.isCollection()) {
            NumWriterMV num_writer = new NumWriterMV(remap.dstSize());
	        
	        HeapList heap = new HeapList(sources.size() - 1);
	        IxNum current = null;
	        for(int i = 0; i < sources.size(); i++) {
	            current = new IxNum(i, remap, new NumSearcherMV(sources.get(i), table, field));
	            current.next();
	            current = heap.AddEx(current);
	        }
	
	        while (current.doc != Integer.MAX_VALUE)
	        {
	        	num_writer.add(current.doc, current.num);
	            current.next();
	            current = heap.AddEx(current);
	        }
	        
	        num_writer.close(destination, table, field);
	        stats.addNumField(fieldDef, num_writer);
        }
        else {
            NumWriter num_writer = new NumWriter(remap.dstSize());
            
            for(int i = 0; i < sources.size(); i++) {
            	NumSearcherMV num_searcher = new NumSearcherMV(sources.get(i), table, field);
            	for(int j = 0; j < remap.size(i); j++) {
            		int doc = remap.get(i, j);
            		if(doc < 0) continue;
            		if(num_searcher.sv_isNull(j)) continue;
            		long d = num_searcher.sv_get(j);
            		num_writer.add(doc, d); 
            	}
            }
            
            num_writer.close(destination, table, field);
            synchronized (m_syncRoot) {
                stats.addNumField(fieldDef, num_writer);
    		}
        }
        
    }
	
    private void mergeTextField(FieldDefinition fieldDef) {
		String table = fieldDef.getTableName();
		String field = fieldDef.getName();
        Remap docRemap = remaps.get(table);
		if(docRemap.dstSize() == 0) return;
		Remap valRemap = new Remap(sources.size());
		
		{
	        ValueWriter value_writer = new ValueWriter(destination, table, field);
	        
	        HeapList heap = new HeapList(sources.size() - 1);
	        IxTerm current = null;
	        for(int i = 0; i < sources.size(); i++) {
	            current = new IxTerm(i, new ValueReader(sources.get(i), table, field));
	            current.next();
	            current = heap.AddEx(current);
	        }
	
	        while (current.term != null)
	        {
	        	int dstVal = value_writer.add(current.term, current.orig);
	        	valRemap.set(current.segment, current.reader.cur_number, dstVal);
	            current.next();
	            current = heap.AddEx(current);
	        }
	        
	        value_writer.close();
		}
		
        if(fieldDef.isCollection()) {
	        FieldWriter field_writer = new FieldWriter(docRemap.dstSize());
	        
	        HeapList heap = new HeapList(sources.size() - 1);
	        IxVal current = null;
	        for(int i = 0; i < sources.size(); i++) {
	            current = new IxVal(i, docRemap, valRemap, new FieldSearcher(sources.get(i), table, field));
	            current.next();
	            current = heap.AddEx(current);
	        }
	
	        while (current.doc != Integer.MAX_VALUE)
	        {
	        	field_writer.add(current.doc, current.val);
	            current.next();
	            current = heap.AddEx(current);
	        }
	        
	        field_writer.close(destination, table, field);
            synchronized (this) {
    	        stats.addTextField(fieldDef, field_writer);
    		}
        }
        else {
	        FieldWriterSV field_writer = new FieldWriterSV(docRemap.dstSize());
	        
            for(int i = 0; i < sources.size(); i++) {
            	FieldSearcher field_searcher = new FieldSearcher(sources.get(i), table, field);
            	for(int j = 0; j < docRemap.size(i); j++) {
            		int doc = docRemap.get(i, j);
            		if(doc < 0) continue;
            		int d = field_searcher.sv_get(j);
            		if(d < 0) continue;
            		field_writer.set(doc, valRemap.get(i, d));
            	}
            }
	        
	        field_writer.close(destination, table, field);
            synchronized (this) {
    	        stats.addTextField(fieldDef, field_writer);
    		}
        }
    }

    private void mergeLinkField(FieldDefinition fieldDef)
    {
		String table = fieldDef.getTableName();
		String link = fieldDef.getName();
		
        Remap docRemap = remaps.get(table);
        Remap valRemap = remaps.get(fieldDef.getLinkExtent());
		if(docRemap.dstSize() == 0 || valRemap.dstSize() == 0) return;

		if(InverseLinkWriter.shouldWriteInverse(fieldDef)) {
		    InverseLinkWriter.writeInverse(destination, fieldDef, stats);
		}
        //all links are multi-valued because of referential integrity and idempotent updates
        //if(fieldDef.isCollection())
        else
        {
	        FieldWriter field_writer = new FieldWriter(docRemap.dstSize());
	        
	        HeapList heap = new HeapList(sources.size() - 1);
	        IxVal current = null;
	        for(int i = 0; i < sources.size(); i++) {
	            current = new IxVal(i, docRemap, valRemap, new FieldSearcher(sources.get(i), table, link));
	            current.next();
	            current = heap.AddEx(current);
	        }
	
	        while (current.doc != Integer.MAX_VALUE)
	        {
	        	field_writer.add(current.doc, current.val);
	            current.next();
	            current = heap.AddEx(current);
	        }
	        
	        field_writer.close(destination, table, link);
            synchronized (this) {
    	        stats.addLinkField(fieldDef, field_writer);
    		}
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy