All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jsonldjava.core.NormalizeUtils Maven / Gradle / Ivy

There is a newer version: 0.13.6
Show newest version
package com.github.jsonldjava.core;

import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static com.github.jsonldjava.core.JSONLDUtils.*;
import static com.github.jsonldjava.core.RDFDatasetUtils.*;

class NormalizeUtils {
	
	private UniqueNamer namer;
	private Map bnodes;
	private List quads;
	private Options options;

	public NormalizeUtils(List quads, Map bnodes, UniqueNamer namer, Options options) {
		this.options = options;
		this.quads = quads;
		this.bnodes = bnodes;
		this.namer = namer;
	}

	// generates unique and duplicate hashes for bnodes
	public Object hashBlankNodes(Collection unnamed_) throws JSONLDProcessingError {
		List unnamed = new ArrayList(unnamed_);
		List nextUnnamed = new ArrayList();
		Map> duplicates = new LinkedHashMap>();
		Map unique = new LinkedHashMap();
		
		// NOTE: not using the same structure as javascript here to avoid possible stack overflows
		// hash quads for each unnamed bnode
		for (int hui = 0;;hui++) {
			if (hui == unnamed.size()) {
				// done, name blank nodes
				Boolean named = false;
				List hashes = new ArrayList(unique.keySet());
				Collections.sort(hashes);
				for (String hash : hashes) {
					String bnode = unique.get(hash);
					namer.getName(bnode);
					named = true;
				}
				
				// continue to hash bnodes if a bnode was assigned a name
				if (named) {
					// this resets the initial variables, so it seems like it has to go on the stack
					// but since this is the end of the function either way, it might not have to
					//hashBlankNodes(unnamed);
					hui = -1;
					unnamed = nextUnnamed;
					nextUnnamed = new ArrayList();
					duplicates = new LinkedHashMap>();
					unique = new LinkedHashMap();
					continue;
				}
				// name the duplicate hash bnods
				else {
					// names duplicate hash bnodes
					// enumerate duplicate hash groups in sorted order
					hashes = new ArrayList(duplicates.keySet());
					Collections.sort(hashes);
					
					// process each group
					for (int pgi = 0;;pgi++) {
						if (pgi == hashes.size()) {
							// done, create JSON-LD array
							//return createArray();
							List normalized = new ArrayList();
							
							// Note: At this point all bnodes in the set of RDF quads have been
							// assigned canonical names, which have been stored in the 'namer' object.
							// Here each quad is updated by assigning each of its bnodes its new name
							// via the 'namer' object
							
							// update bnode names in each quad and serialize
							for (int cai = 0; cai < quads.size(); ++cai) {
								Map quad = (Map) quads.get(cai);
								for (String attr: new String[] { "subject", "object", "name" }) {
									if (quad.containsKey(attr)) {
										Map qa = (Map) quad.get(attr);
										if (qa != null && "blank node".equals(qa.get("type")) &&
											((String)qa.get("value")).indexOf("_:c14n") != 0) {
											qa.put("value", namer.getName((String)qa.get(("value"))));
										}
									}
								}
								normalized.add(toNQuad(quad, quad.containsKey("name") && quad.get("name") != null ? (String)((Map) quad.get("name")).get("value") : null));
							}
							
							// sort normalized output
							Collections.sort(normalized);
							
							// handle output format
							if (options.format != null) {
								if ("application/nquads".equals(options.format)) {
									String rval = "";
									for (String n : normalized) {
										rval += n;
									}
									return rval;
								} else {
									throw new JSONLDProcessingError("Unknown output format.")
										.setType(JSONLDProcessingError.Error.UNKNOWN_FORMAT)
										.setDetail("format", options.format);
								}
							}
							String rval = "";
							for (String n : normalized) {
								rval += n;
							}
							return parseNQuads(rval);
						}
						
						// name each group member
						List group = duplicates.get(hashes.get(pgi));
						List results = new ArrayList();
						for (int n = 0;;n++) {
							if (n == group.size()) {
								// name bnodes in hash order
								Collections.sort(results, new Comparator() {
									@Override
									public int compare(HashResult a, HashResult b) {
										int res = a.hash.compareTo(b.hash);
										return res;
									}
								});
								for (HashResult r : results) {
									// name all bnodes in path namer in key-entry order
									// Note: key-order is preserved in javascript
									for (String key: r.pathNamer.existing().keySet()) {
										namer.getName(key);
									}
								}
								// processGroup(i+1);
								break;
							} else {
								// skip already-named bnodes
								String bnode = group.get(n);
								if (namer.isNamed(bnode)) {
									continue;
								}
								
								// hash bnode paths
								UniqueNamer pathNamer = new UniqueNamer("_:b");
								pathNamer.getName(bnode);
								
								HashResult result = hashPaths(bnode, bnodes, namer, pathNamer);
								results.add(result);
							}
						}
					}
				}
			}
			
			// hash unnamed bnode
			String bnode = unnamed.get(hui);
			String hash = hashQuads(bnode, bnodes, namer);
			
			// store hash as unique or a duplicate
			if (duplicates.containsKey(hash)) {
				duplicates.get(hash).add(bnode);
				nextUnnamed.add(bnode);
			}
			else if (unique.containsKey(hash)) {
				List tmp = new ArrayList();
				tmp.add(unique.get(hash));
				tmp.add(bnode);
				duplicates.put(hash, tmp);
				nextUnnamed.add(unique.get(hash));
				nextUnnamed.add(bnode);
				unique.remove(hash);
			}
			else {
				unique.put(hash, bnode);
			}	
		}
	}

	private static class HashResult {
		String hash;
		UniqueNamer pathNamer;
	}
	
	/**
	 * Produces a hash for the paths of adjacent bnodes for a bnode,
	 * incorporating all information about its subgraph of bnodes. This
	 * method will recursively pick adjacent bnode permutations that produce the
	 * lexicographically-least 'path' serializations.
	 *
	 * @param id the ID of the bnode to hash paths for.
	 * @param bnodes the map of bnode quads.
	 * @param namer the canonical bnode namer.
	 * @param pathNamer the namer used to assign names to adjacent bnodes.
	 * @param callback(err, result) called once the operation completes.
	 */
	private static HashResult hashPaths(String id,
			Map bnodes, UniqueNamer namer,
			UniqueNamer pathNamer) {
		try {
			// create SHA-1 digest
			MessageDigest md = MessageDigest.getInstance("SHA-1");
			
			Map> groups = new LinkedHashMap>();
			List groupHashes;
			List quads = (List) ((Map) bnodes.get(id)).get("quads");
			
			for (int hpi = 0;; hpi++) {
				if (hpi == quads.size()) {
					// done , hash groups
					groupHashes = new ArrayList(groups.keySet());
					Collections.sort(groupHashes);
					for (int hgi = 0;; hgi++) {
						if (hgi == groupHashes.size()) {
							HashResult res = new HashResult();
							res.hash = encodeHex(md.digest());
							res.pathNamer = pathNamer;
							return res;
						}
						
						// digest group hash
						String groupHash = groupHashes.get(hgi);
						md.update(groupHash.getBytes("UTF-8"));
						
						// choose a path and namer from the permutations
						String chosenPath = null;
						UniqueNamer chosenNamer = null;
						Permutator permutator = new Permutator(groups.get(groupHash));
						while (true) {
							Boolean contPermutation = false;
							Boolean breakOut = false;
							List permutation = permutator.next();
							UniqueNamer pathNamerCopy = pathNamer.clone();
							
							// build adjacent path
							String path = "";
							List recurse = new ArrayList();
							for (String bnode : permutation) {
								// use canonical name if available
								if (namer.isNamed(bnode)) {
									path += namer.getName(bnode);
								}
								else {
									// recurse if bnode isn't named in the path yet
									if (!pathNamerCopy.isNamed(bnode)) {
										recurse.add(bnode);
									}
									path += pathNamerCopy.getName(bnode);
								}
								
								// skip permutation if path is already >= chosen path
								if (chosenPath != null && path.length() >= chosenPath.length() && path.compareTo(chosenPath) > 0) {
									//return nextPermutation(true);
									if (permutator.hasNext()) {
										contPermutation = true;
									} else {
										// digest chosen path and update namer
										md.update(chosenPath.getBytes("UTF-8"));
										pathNamer = chosenNamer;
										// hash the nextGroup
										breakOut = true;
									}
									break;
								}
							}
							
							// if we should do the next permutation
							if (contPermutation) continue;
							// if we should stop processing this group 
							if (breakOut) break;
							
							// does the next recursion
							for (int nrn = 0;; nrn++) {
								if (nrn == recurse.size()) {
									//return nextPermutation(false);
									if (chosenPath == null || path.compareTo(chosenPath) < 0) {
										chosenPath = path;
										chosenNamer = pathNamerCopy;
									}
									if (!permutator.hasNext()) {
										// digest chosen path and update namer
										md.update(chosenPath.getBytes("UTF-8"));
										pathNamer = chosenNamer;
										// hash the nextGroup
										breakOut = true;
									}
									break;
								}
								
								// do recursion
								String bnode = recurse.get(nrn);
								HashResult result = hashPaths(bnode, bnodes, namer, pathNamerCopy);
								path += pathNamerCopy.getName(bnode) + "<" + result.hash + ">";
								pathNamerCopy = result.pathNamer;
								
								// skip permutation if path is already >= chosen path
								if (chosenPath != null && path.length() >= chosenPath.length() && path.compareTo(chosenPath) > 0) {
									//return nextPermutation(true);
									if (!permutator.hasNext()) {
										// digest chosen path and update namer
										md.update(chosenPath.getBytes("UTF-8"));
										pathNamer = chosenNamer;
										// hash the nextGroup
										breakOut = true;
									}
									break;
								}
								// do next recursion
							}
							
							// if we should stop processing this group 
							if (breakOut) break;
						}
					}
				}
				
				// get adjacent bnode
				Map quad = (Map) quads.get(hpi);
				String bnode = getAdjacentBlankNodeName((Map) quad.get("subject"), id);
				String direction = null;
				if (bnode != null) {
					// normal property
					direction = "p";
				}
				else {
					bnode = getAdjacentBlankNodeName((Map) quad.get("object"), id);
					if (bnode != null) {
						// reverse property
						direction = "r";
					}
				}
				
				if (bnode != null) {
					// get bnode name (try canonical, path, then hash)
					String name;
					if (namer.isNamed(bnode)) {
						name = namer.getName(bnode);
					}
					else if (pathNamer.isNamed(bnode)) {
						name = pathNamer.getName(bnode);
					}
					else {
						name = hashQuads(bnode, bnodes, namer);
					}
					
					// hash direction, property, end bnode name/hash
					MessageDigest md1 = MessageDigest.getInstance("SHA-1");
					//String toHash = direction + (String) ((Map) quad.get("predicate")).get("value") + name;
					md1.update(direction.getBytes("UTF-8"));
					md1.update(((String)((Map) quad.get("predicate")).get("value")).getBytes("UTF-8"));
					md1.update(name.getBytes("UTF-8"));
					String groupHash = encodeHex(md1.digest());
					if (groups.containsKey(groupHash)) {
						groups.get(groupHash).add(bnode);
					} else {
						List tmp = new ArrayList();
						tmp.add(bnode);
						groups.put(groupHash, tmp);
					}
				}
			}
		} catch (NoSuchAlgorithmException e) {
			// TODO: i don't expect that SHA-1 is even NOT going to be available?
			// look into this further
			throw new RuntimeException(e);
		} catch (UnsupportedEncodingException e) {
			// TODO: i don't expect that UTF-8 is ever not going to be available either
			throw new RuntimeException(e);
		}
	}
	
	/**
	 * Hashes all of the quads about a blank node.
	 *
	 * @param id the ID of the bnode to hash quads for.
	 * @param bnodes the mapping of bnodes to quads.
	 * @param namer the canonical bnode namer.
	 *
	 * @return the new hash.
	 */
	private static String hashQuads(
			String id,
			Map bnodes,
			UniqueNamer namer) {
		// return cached hash
		if (((Map) bnodes.get(id)).containsKey("hash")) {
			return (String) ((Map) bnodes.get(id)).get("hash");
		}
		
		// serialize all of bnode's quads
		List> quads = (List>) ((Map) bnodes.get(id)).get("quads");
		List nquads = new ArrayList();
		for (int i = 0; i < quads.size(); ++i) {
			nquads.add(toNQuad(quads.get(i), quads.get(i).get("name") != null ? (String)((Map) quads.get(i).get("name")).get("value") : null, id));
		}
		// sort serialized quads
		Collections.sort(nquads);
		// return hashed quads
		String hash = sha1hash(nquads);
		((Map) bnodes.get(id)).put("hash", hash);
		return hash;
	}

	/**
	 * A helper class to sha1 hash all the strings in a collection
	 * 
	 * @param nquads
	 * @return
	 */
	private static String sha1hash(Collection nquads) {
		try {
			// create SHA-1 digest
			MessageDigest md = MessageDigest.getInstance("SHA-1");
			for (String nquad : nquads) {
				md.update(nquad.getBytes("UTF-8"));
			}
			return encodeHex(md.digest());
		} catch (NoSuchAlgorithmException e) {
			throw new RuntimeException(e);
		} catch (UnsupportedEncodingException e) {
			throw new RuntimeException(e);
		}
	}
	
	// TODO: this is something to optimize
	private static String encodeHex(final byte[] data) {
		String rval = "";
		for (byte b : data) {
			rval += String.format("%02x", b);
		}
		return rval;
	}

	/**
	 * A helper function that gets the blank node name from an RDF quad node
	 * (subject or object). If the node is a blank node and its value
	 * does not match the given blank node ID, it will be returned.
	 *
	 * @param node the RDF quad node.
	 * @param id the ID of the blank node to look next to.
	 *
	 * @return the adjacent blank node name or null if none was found.
	 */
	private static String getAdjacentBlankNodeName(Map node, String id) {
		return "blank node".equals(node.get("type")) && (!node.containsKey("value") || !_equals(node.get("value"), id)) ? (String)node.get("value") : null;
	}

	private static class Permutator {

		private List list;
		private boolean done;
		private Map left;
		
		public Permutator(List list) {
			this.list = (List)JSONLDUtils.clone(list);
			Collections.sort(this.list);
			this.done = false;
			this.left = new LinkedHashMap();
			for (String i : this.list) {
				this.left.put(i, true);
			}
		}
		
		/**
		 * Returns true if there is another permutation.
		 *
		 * @return true if there is another permutation, false if not.
		 */
		public boolean hasNext() {
			return !this.done;
		}
		
		/**
		 * Gets the next permutation. Call hasNext() to ensure there is another one
		 * first.
		 *
		 * @return the next permutation.
		 */
		public List next() {
			List rval = (List)JSONLDUtils.clone(this.list);
			
			// Calculate the next permutation using Steinhaus-Johnson-Trotter permutation algoritm
			
			// get largest mobile element k
			// (mobile: element is grater than the one it is looking at)
			String k = null;
			int pos = 0;
			int length = this.list.size();
			for (int i = 0; i < length; ++i) {
				String element = this.list.get(i);
				Boolean left = this.left.get(element);
				if ((k == null || element.compareTo(k) > 0) && 
				   ((left && i > 0 && element.compareTo(this.list.get(i - 1)) > 0) ||
				   (!left && i < (length - 1) && element.compareTo(this.list.get(i + 1)) > 0))) {
					k = element;
					pos = i;
				}
			}
			
			// no more permutations
			if (k == null){
				this.done = true;
			}
			else {
				// swap k and the element it is looking at
				int swap = this.left.get(k) ? pos - 1 : pos + 1;
				this.list.set(pos, this.list.get(swap));
				this.list.set(swap, k);

				// reverse the direction of all element larger than k
				for (int i = 0; i < length; i++) {
					if (this.list.get(i).compareTo(k) > 0) {
						this.left.put(this.list.get(i), !this.left.get(this.list.get(i)));
					}
				}
			}
			
			return rval;
		}
		
	}

}