All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netflix.search.query.engine.BaseIndexer Maven / Gradle / Ivy

/**
 * Copyright 2016 Netflix, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.netflix.search.query.engine;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Sets;
import com.netflix.search.query.Properties;
import com.netflix.search.query.utils.StringUtils;
import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;

public abstract class BaseIndexer {
    public static final Logger logger = LoggerFactory.getLogger(BaseIndexer.class);

    public static final String ENCODING = "UTF-8";
    private static final int BUFFER_SIZE = 1 << 16; // 64K
    private Client client = Client.create();

    private String inputFileName=null;
    private String testName=null;

    public BaseIndexer(String inputFileName, String testName) {
        this.inputFileName = inputFileName;
        this.testName = testName;
    }
    
    public void indexData(List languages) throws Throwable
    {
        long start = System.currentTimeMillis();
        List> docs = createDocs(languages);
        update(docs);
        commit();
        logger.info("Indexing took: " + (System.currentTimeMillis() - start) + " ms");
    }

    protected List> createDocs(List languages) throws Throwable
    {
        List> docs = new ArrayList>();

        InputStream is = new BufferedInputStream(new FileInputStream(inputFileName), BUFFER_SIZE);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, ENCODING), BUFFER_SIZE);
        String lineString = null;
        while ((lineString = reader.readLine()) != null) {
            String[] line = lineString.split(Properties.inputDelimiter.get());
			if (lineString.startsWith(Properties.idField.get() + Properties.inputDelimiter.get()))
				continue;
            if (line.length < 3)
                logger.error("Bad data: " + lineString);
            else
                docs.add(createDoc(line[0], line[1], line[2], line[3], languages));
        }
        reader.close();
        is.close();
        return docs;
    }

    public Map createDoc(String id, String english, String local, String altTitle, List languages)
    {
        Map doc = new HashMap();
        doc.put(Properties.idField.get(), StringUtils.createIdUsingTestName(id, testName));
        
        for(String requiredField:Properties.requiredNumericFields.get())
        	doc.put(requiredField, 1);

        for(String requiredField:Properties.requiredStringFields.get())
        	doc.put(requiredField, "query_testing_default");

        if (local != null && local.length() > 0) {
            for (String language: languages){
            	for(String fieldName: Properties.titleFields.get()) {
					doc.put(fieldName + "_" + language, addValue(doc, language, fieldName, local));
					//TODO: bug?
					if (Properties.languagesRequiringTransliterationFromEnglish.get().contains(language) && fieldName.equals(Properties.transliterationFieldName.get()))
						doc.put(fieldName + "_" + language, english);
				}
            }
        }
        if (english != null && english.length() > 0) {
        	for(String fieldName: Properties.titleFields.get())
				doc.put(fieldName + "_en", english);
        }
		if (altTitle != null && altTitle.length() > 0) {
			for (String language : languages) {
				if (Properties.languagesRequiringAdditionalField.get().contains(language))
					for (String fieldName : Properties.titleAkaFields.get()) {
						doc.put(fieldName + "_" + language, addValue(doc, language, fieldName, altTitle));
					}
			}
		}

        doc.put(Properties.docTypeFieldName.get(), testName);
        
        return doc;
    }

	private Set addValue(Map doc, String language, String fieldName, String title)
	{
		@SuppressWarnings("unchecked")
		Set existingValues = (Set)doc.get(fieldName + "_" + language);
		if(existingValues==null) existingValues = Sets.newHashSet();
		existingValues.add(title);
		return existingValues;
	}

	private boolean update(List> docs) throws IOException {
		for (Map doc : docs) {
			try {
				addDoc(doc);
			} catch (Throwable e) {
				logger.error("bad doc" + doc);
				throw new RuntimeException(e);
			}
		}
		return true;
	}

	void addDoc(Map doc)
	{
		JsonNode node = new ObjectMapper().valueToTree(doc);
		StringBuilder jsonString = getJsonStringOfDoc(node);

		WebResource webResource = client.resource(getUrlForAddingDoc(doc));
		ClientResponse response = webResource.type("application/json").post(ClientResponse.class, jsonString.toString());
		if (response == null || (response.getStatus() != 201 && response.getStatus() != 200))
		{
			throw new RuntimeException("Failed : HTTP error code on adding a doc: " + response.getStatus());
		}
		response.close();
	}

	public StringBuilder getJsonStringOfDoc(JsonNode node)
	{
		StringBuilder jsonString = new StringBuilder("[");
		nodeAsString(node, jsonString);
		jsonString.append("]");
		return jsonString;
	}

	public void nodeAsString(JsonNode node, StringBuilder jsonString)
	{
		try
		{
			jsonString.append(new ObjectMapper().writeValueAsString(node));
		} catch (JsonProcessingException e)
		{
			logger.error("Error trying to generate a string from a json node", e);
		}
	}

	void commit()
	{
		WebResource webResource = client.resource(getUrlForCommitting());
		ClientResponse response = webResource.get(ClientResponse.class);
		if (response == null || (response.getStatus() != 201 && response.getStatus() != 200))
		{
			throw new RuntimeException("Failed : HTTP error code on commit: " + response.getStatus());
		}
		response.close();
	}
	
    public String getServerUrl(){
    	return "http://" + Properties.engineHost.get() + ":" + Properties.enginePort.get() + "/" + Properties.engineServlet.get() + "/" + Properties.engineIndexName.get();
    }
	
	public abstract String getUrlForAddingDoc(Map doc);
	public abstract String getUrlForCommitting();


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy