All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unibonn.iai.eis.luzzu.web.extensions.LODLaundromat Maven / Gradle / Ivy

The newest version!
package de.unibonn.iai.eis.luzzu.web.extensions;

import java.io.IOException;
import java.math.RoundingMode;
import java.net.URL;
import java.text.DecimalFormat;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.google.common.base.Charsets;
import com.google.common.io.Resources;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;

import de.unibonn.iai.eis.luzzu.operations.ranking.DatasetLoader;
import de.unibonn.iai.eis.luzzu.semantics.configuration.InternalModelConf;
import de.unibonn.iai.eis.luzzu.web.visualise.Data;
import de.unibonn.iai.eis.luzzu.web.visualise.datatypes.DatasetObject;
import de.unibonn.iai.eis.luzzu.web.visualise.datatypes.MetricObject;

public class LODLaundromat {
	final static Logger logger = LoggerFactory.getLogger(Data.class);
	
	private static Dataset d = DatasetLoader.getInstance().getInternalDataset();
	private static Map graphs = DatasetLoader.getInstance().getAllGraphs();

	public static String getQualityStats(){
		d.addNamedModel("urn:InternalModelConfig", InternalModelConf.getFlatModel());
		
		String selectQuery = "";
		
		URL url = Resources.getResource("GetMetricObservationValue.sparql");
		try {
			selectQuery = Resources.toString(url, Charsets.UTF_8);
		} catch (IOException e) {
			logger.error("Error: {}",e.getMessage());
		}
		
		String[] metrics = new String[]{"",""};
		String[] jsonString = new String[]{"averageCorrectLanguageTag","averageCompatibleDatatype"};
		
		double sum = 0.0;
		double counter = 0.0;
		String json = "{";
		
		json += "\"totalAssessedFiles\" : " + graphs.size() + ",";
		int c = 0;
		
		for (String m : metrics){
			System.out.println(jsonString[c]);
			for (String ds : graphs.keySet()){
				String sel = selectQuery;
				sel = sel.replace("%metric%", m);
				sel = sel.replace("%dataset%", "");
				
				QueryExecution exec =  QueryExecutionFactory.create(QueryFactory.create(sel), getFlatModel());
				
				ResultSet set = exec.execSelect();
				while(set.hasNext()){
					QuerySolution sol = set.next();
					double val = sol.get("value").asLiteral().getDouble() * 100.0;
					
					if (!Double.isNaN(val)){
						sum += val;
						counter++;
					} 
					
					if (val < 0.4){
						System.out.println(ds);
					}
				}
			}
			double avg = (sum/counter);
			
			DecimalFormat df = new DecimalFormat("#.###");
			df.setRoundingMode(RoundingMode.CEILING);
			json += "\""+jsonString[c]+"\" : " + df.format(avg) + ",";
			c++;
		}
		
		json = json.substring(0, json.length()-1);
		json += "}";
		
		return json;
	}

	public static String getSampleForLodLaundromat(String metric){
		
		d.addNamedModel("urn:InternalModelConfig", InternalModelConf.getFlatModel());
		
		String selectQuery = "";
		
		URL url = Resources.getResource("GetMetricObservationValue.sparql");
		try {
			selectQuery = Resources.toString(url, Charsets.UTF_8);
		} catch (IOException e) {
			logger.error("Error: {}",e.getMessage());
		}
		
		// Randomly Choosing 30;
		// TODO: reservoir sampling
		int sample_size = 30;
		Set randomNumbers = new HashSet();
		while (randomNumbers.size() <= sample_size){
			Random r = new Random();
			int  n = r.nextInt(graphs.size());
			randomNumbers.add(n);
		}
		
		Set dsObjects = new HashSet();
		
		int counter = 0;
		for (String ds : graphs.keySet()){
			if (!(randomNumbers.contains(counter))) {
				counter++;
				continue;
			}
			counter++;
			
			
			String sel = selectQuery;
			sel = sel.replace("%metric%", "<"+metric+">");
			sel = sel.replace("%dataset%", "");
			
			QueryExecution exec =  QueryExecutionFactory.create(QueryFactory.create(sel), getFlatModel());
			
			ResultSet set = exec.execSelect();
			while(set.hasNext()){
				QuerySolution sol = set.next();
				String metricName = sol.get("metric_name").asLiteral().toString();
				double value = sol.get("value").asLiteral().getDouble();
				
				DatasetObject dso = new DatasetObject();
				dso.setName(ds);
				
				MetricObject mo = new MetricObject();
				mo.setName(metricName);
				mo.setUri(metric);
				mo.setLatestValue(value * 100.0);
				
				dso.getMetrics().add(mo);
				
				if (!Double.isNaN(value)){
					dsObjects.add(dso);
				} else {
					randomNumbers.add(counter + 1);
					continue;
				}
				
			}
		}
		
		String json = "{ \"datasets\" : [";
		for(DatasetObject dso : dsObjects){
			ObjectWriter ow = new ObjectMapper().writer().withDefaultPrettyPrinter();
			try {
				json += ow.writeValueAsString(dso) + ",";
			} catch (JsonProcessingException e) {
				logger.error("Error transforming to json : {}", e.getMessage());
			}
		}
		json = json.substring(0, json.length()-1);
		json += "]}";
		
		return json;
	}
	
	private static Model getFlatModel() {
		Model m = ModelFactory.createDefaultModel();
		
		Iterator iter = d.listNames();
		while (iter.hasNext()){
			m.add(d.getNamedModel(iter.next()));
		}
		m.add(d.getDefaultModel());
		
		return m;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy