Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.github.jsonldjava.core.NormalizeUtils Maven / Gradle / Ivy
package com.github.jsonldjava.core;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import static com.github.jsonldjava.core.JSONLDUtils.*;
import static com.github.jsonldjava.core.RDFDatasetUtils.*;
class NormalizeUtils {
private UniqueNamer namer;
private Map bnodes;
private List quads;
private Options options;
public NormalizeUtils(List quads, Map bnodes, UniqueNamer namer, Options options) {
this.options = options;
this.quads = quads;
this.bnodes = bnodes;
this.namer = namer;
}
// generates unique and duplicate hashes for bnodes
public Object hashBlankNodes(Collection unnamed_) throws JSONLDProcessingError {
List unnamed = new ArrayList(unnamed_);
List nextUnnamed = new ArrayList();
Map> duplicates = new LinkedHashMap>();
Map unique = new LinkedHashMap();
// NOTE: not using the same structure as javascript here to avoid possible stack overflows
// hash quads for each unnamed bnode
for (int hui = 0;;hui++) {
if (hui == unnamed.size()) {
// done, name blank nodes
Boolean named = false;
List hashes = new ArrayList(unique.keySet());
Collections.sort(hashes);
for (String hash : hashes) {
String bnode = unique.get(hash);
namer.getName(bnode);
named = true;
}
// continue to hash bnodes if a bnode was assigned a name
if (named) {
// this resets the initial variables, so it seems like it has to go on the stack
// but since this is the end of the function either way, it might not have to
//hashBlankNodes(unnamed);
hui = -1;
unnamed = nextUnnamed;
nextUnnamed = new ArrayList();
duplicates = new LinkedHashMap>();
unique = new LinkedHashMap();
continue;
}
// name the duplicate hash bnods
else {
// names duplicate hash bnodes
// enumerate duplicate hash groups in sorted order
hashes = new ArrayList(duplicates.keySet());
Collections.sort(hashes);
// process each group
for (int pgi = 0;;pgi++) {
if (pgi == hashes.size()) {
// done, create JSON-LD array
//return createArray();
List normalized = new ArrayList();
// Note: At this point all bnodes in the set of RDF quads have been
// assigned canonical names, which have been stored in the 'namer' object.
// Here each quad is updated by assigning each of its bnodes its new name
// via the 'namer' object
// update bnode names in each quad and serialize
for (int cai = 0; cai < quads.size(); ++cai) {
Map quad = (Map) quads.get(cai);
for (String attr: new String[] { "subject", "object", "name" }) {
if (quad.containsKey(attr)) {
Map qa = (Map) quad.get(attr);
if (qa != null && "blank node".equals(qa.get("type")) &&
((String)qa.get("value")).indexOf("_:c14n") != 0) {
qa.put("value", namer.getName((String)qa.get(("value"))));
}
}
}
normalized.add(toNQuad(quad, quad.containsKey("name") && quad.get("name") != null ? (String)((Map) quad.get("name")).get("value") : null));
}
// sort normalized output
Collections.sort(normalized);
// handle output format
if (options.format != null) {
if ("application/nquads".equals(options.format)) {
String rval = "";
for (String n : normalized) {
rval += n;
}
return rval;
} else {
throw new JSONLDProcessingError("Unknown output format.")
.setType(JSONLDProcessingError.Error.UNKNOWN_FORMAT)
.setDetail("format", options.format);
}
}
String rval = "";
for (String n : normalized) {
rval += n;
}
return parseNQuads(rval);
}
// name each group member
List group = duplicates.get(hashes.get(pgi));
List results = new ArrayList();
for (int n = 0;;n++) {
if (n == group.size()) {
// name bnodes in hash order
Collections.sort(results, new Comparator() {
@Override
public int compare(HashResult a, HashResult b) {
int res = a.hash.compareTo(b.hash);
return res;
}
});
for (HashResult r : results) {
// name all bnodes in path namer in key-entry order
// Note: key-order is preserved in javascript
for (String key: r.pathNamer.existing().keySet()) {
namer.getName(key);
}
}
// processGroup(i+1);
break;
} else {
// skip already-named bnodes
String bnode = group.get(n);
if (namer.isNamed(bnode)) {
continue;
}
// hash bnode paths
UniqueNamer pathNamer = new UniqueNamer("_:b");
pathNamer.getName(bnode);
HashResult result = hashPaths(bnode, bnodes, namer, pathNamer);
results.add(result);
}
}
}
}
}
// hash unnamed bnode
String bnode = unnamed.get(hui);
String hash = hashQuads(bnode, bnodes, namer);
// store hash as unique or a duplicate
if (duplicates.containsKey(hash)) {
duplicates.get(hash).add(bnode);
nextUnnamed.add(bnode);
}
else if (unique.containsKey(hash)) {
List tmp = new ArrayList();
tmp.add(unique.get(hash));
tmp.add(bnode);
duplicates.put(hash, tmp);
nextUnnamed.add(unique.get(hash));
nextUnnamed.add(bnode);
unique.remove(hash);
}
else {
unique.put(hash, bnode);
}
}
}
private static class HashResult {
String hash;
UniqueNamer pathNamer;
}
/**
* Produces a hash for the paths of adjacent bnodes for a bnode,
* incorporating all information about its subgraph of bnodes. This
* method will recursively pick adjacent bnode permutations that produce the
* lexicographically-least 'path' serializations.
*
* @param id the ID of the bnode to hash paths for.
* @param bnodes the map of bnode quads.
* @param namer the canonical bnode namer.
* @param pathNamer the namer used to assign names to adjacent bnodes.
* @param callback(err, result) called once the operation completes.
*/
private static HashResult hashPaths(String id,
Map bnodes, UniqueNamer namer,
UniqueNamer pathNamer) {
try {
// create SHA-1 digest
MessageDigest md = MessageDigest.getInstance("SHA-1");
Map> groups = new LinkedHashMap>();
List groupHashes;
List quads = (List) ((Map) bnodes.get(id)).get("quads");
for (int hpi = 0;; hpi++) {
if (hpi == quads.size()) {
// done , hash groups
groupHashes = new ArrayList(groups.keySet());
Collections.sort(groupHashes);
for (int hgi = 0;; hgi++) {
if (hgi == groupHashes.size()) {
HashResult res = new HashResult();
res.hash = encodeHex(md.digest());
res.pathNamer = pathNamer;
return res;
}
// digest group hash
String groupHash = groupHashes.get(hgi);
md.update(groupHash.getBytes("UTF-8"));
// choose a path and namer from the permutations
String chosenPath = null;
UniqueNamer chosenNamer = null;
Permutator permutator = new Permutator(groups.get(groupHash));
while (true) {
Boolean contPermutation = false;
Boolean breakOut = false;
List permutation = permutator.next();
UniqueNamer pathNamerCopy = pathNamer.clone();
// build adjacent path
String path = "";
List recurse = new ArrayList();
for (String bnode : permutation) {
// use canonical name if available
if (namer.isNamed(bnode)) {
path += namer.getName(bnode);
}
else {
// recurse if bnode isn't named in the path yet
if (!pathNamerCopy.isNamed(bnode)) {
recurse.add(bnode);
}
path += pathNamerCopy.getName(bnode);
}
// skip permutation if path is already >= chosen path
if (chosenPath != null && path.length() >= chosenPath.length() && path.compareTo(chosenPath) > 0) {
//return nextPermutation(true);
if (permutator.hasNext()) {
contPermutation = true;
} else {
// digest chosen path and update namer
md.update(chosenPath.getBytes("UTF-8"));
pathNamer = chosenNamer;
// hash the nextGroup
breakOut = true;
}
break;
}
}
// if we should do the next permutation
if (contPermutation) continue;
// if we should stop processing this group
if (breakOut) break;
// does the next recursion
for (int nrn = 0;; nrn++) {
if (nrn == recurse.size()) {
//return nextPermutation(false);
if (chosenPath == null || path.compareTo(chosenPath) < 0) {
chosenPath = path;
chosenNamer = pathNamerCopy;
}
if (!permutator.hasNext()) {
// digest chosen path and update namer
md.update(chosenPath.getBytes("UTF-8"));
pathNamer = chosenNamer;
// hash the nextGroup
breakOut = true;
}
break;
}
// do recursion
String bnode = recurse.get(nrn);
HashResult result = hashPaths(bnode, bnodes, namer, pathNamerCopy);
path += pathNamerCopy.getName(bnode) + "<" + result.hash + ">";
pathNamerCopy = result.pathNamer;
// skip permutation if path is already >= chosen path
if (chosenPath != null && path.length() >= chosenPath.length() && path.compareTo(chosenPath) > 0) {
//return nextPermutation(true);
if (!permutator.hasNext()) {
// digest chosen path and update namer
md.update(chosenPath.getBytes("UTF-8"));
pathNamer = chosenNamer;
// hash the nextGroup
breakOut = true;
}
break;
}
// do next recursion
}
// if we should stop processing this group
if (breakOut) break;
}
}
}
// get adjacent bnode
Map quad = (Map) quads.get(hpi);
String bnode = getAdjacentBlankNodeName((Map) quad.get("subject"), id);
String direction = null;
if (bnode != null) {
// normal property
direction = "p";
}
else {
bnode = getAdjacentBlankNodeName((Map) quad.get("object"), id);
if (bnode != null) {
// reverse property
direction = "r";
}
}
if (bnode != null) {
// get bnode name (try canonical, path, then hash)
String name;
if (namer.isNamed(bnode)) {
name = namer.getName(bnode);
}
else if (pathNamer.isNamed(bnode)) {
name = pathNamer.getName(bnode);
}
else {
name = hashQuads(bnode, bnodes, namer);
}
// hash direction, property, end bnode name/hash
MessageDigest md1 = MessageDigest.getInstance("SHA-1");
//String toHash = direction + (String) ((Map) quad.get("predicate")).get("value") + name;
md1.update(direction.getBytes("UTF-8"));
md1.update(((String)((Map) quad.get("predicate")).get("value")).getBytes("UTF-8"));
md1.update(name.getBytes("UTF-8"));
String groupHash = encodeHex(md1.digest());
if (groups.containsKey(groupHash)) {
groups.get(groupHash).add(bnode);
} else {
List tmp = new ArrayList();
tmp.add(bnode);
groups.put(groupHash, tmp);
}
}
}
} catch (NoSuchAlgorithmException e) {
// TODO: i don't expect that SHA-1 is even NOT going to be available?
// look into this further
throw new RuntimeException(e);
} catch (UnsupportedEncodingException e) {
// TODO: i don't expect that UTF-8 is ever not going to be available either
throw new RuntimeException(e);
}
}
/**
* Hashes all of the quads about a blank node.
*
* @param id the ID of the bnode to hash quads for.
* @param bnodes the mapping of bnodes to quads.
* @param namer the canonical bnode namer.
*
* @return the new hash.
*/
private static String hashQuads(
String id,
Map bnodes,
UniqueNamer namer) {
// return cached hash
if (((Map) bnodes.get(id)).containsKey("hash")) {
return (String) ((Map) bnodes.get(id)).get("hash");
}
// serialize all of bnode's quads
List> quads = (List>) ((Map) bnodes.get(id)).get("quads");
List nquads = new ArrayList();
for (int i = 0; i < quads.size(); ++i) {
nquads.add(toNQuad(quads.get(i), quads.get(i).get("name") != null ? (String)((Map) quads.get(i).get("name")).get("value") : null, id));
}
// sort serialized quads
Collections.sort(nquads);
// return hashed quads
String hash = sha1hash(nquads);
((Map) bnodes.get(id)).put("hash", hash);
return hash;
}
/**
* A helper class to sha1 hash all the strings in a collection
*
* @param nquads
* @return
*/
private static String sha1hash(Collection nquads) {
try {
// create SHA-1 digest
MessageDigest md = MessageDigest.getInstance("SHA-1");
for (String nquad : nquads) {
md.update(nquad.getBytes("UTF-8"));
}
return encodeHex(md.digest());
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
// TODO: this is something to optimize
private static String encodeHex(final byte[] data) {
String rval = "";
for (byte b : data) {
rval += String.format("%02x", b);
}
return rval;
}
/**
* A helper function that gets the blank node name from an RDF quad node
* (subject or object). If the node is a blank node and its value
* does not match the given blank node ID, it will be returned.
*
* @param node the RDF quad node.
* @param id the ID of the blank node to look next to.
*
* @return the adjacent blank node name or null if none was found.
*/
private static String getAdjacentBlankNodeName(Map node, String id) {
return "blank node".equals(node.get("type")) && (!node.containsKey("value") || !_equals(node.get("value"), id)) ? (String)node.get("value") : null;
}
private static class Permutator {
private List list;
private boolean done;
private Map left;
public Permutator(List list) {
this.list = (List)JSONLDUtils.clone(list);
Collections.sort(this.list);
this.done = false;
this.left = new LinkedHashMap();
for (String i : this.list) {
this.left.put(i, true);
}
}
/**
* Returns true if there is another permutation.
*
* @return true if there is another permutation, false if not.
*/
public boolean hasNext() {
return !this.done;
}
/**
* Gets the next permutation. Call hasNext() to ensure there is another one
* first.
*
* @return the next permutation.
*/
public List next() {
List rval = (List)JSONLDUtils.clone(this.list);
// Calculate the next permutation using Steinhaus-Johnson-Trotter permutation algoritm
// get largest mobile element k
// (mobile: element is grater than the one it is looking at)
String k = null;
int pos = 0;
int length = this.list.size();
for (int i = 0; i < length; ++i) {
String element = this.list.get(i);
Boolean left = this.left.get(element);
if ((k == null || element.compareTo(k) > 0) &&
((left && i > 0 && element.compareTo(this.list.get(i - 1)) > 0) ||
(!left && i < (length - 1) && element.compareTo(this.list.get(i + 1)) > 0))) {
k = element;
pos = i;
}
}
// no more permutations
if (k == null){
this.done = true;
}
else {
// swap k and the element it is looking at
int swap = this.left.get(k) ? pos - 1 : pos + 1;
this.list.set(pos, this.list.get(swap));
this.list.set(swap, k);
// reverse the direction of all element larger than k
for (int i = 0; i < length; i++) {
if (this.list.get(i).compareTo(k) > 0) {
this.left.put(this.list.get(i), !this.left.get(this.list.get(i)));
}
}
}
return rval;
}
}
}