
com.sindicetech.siren.util.IOUtils Maven / Gradle / Ivy
The newest version!
/**
* Copyright (c) 2014, Sindice Limited. All Rights Reserved.
*
* This file is part of the SIREn project.
*
* SIREn is a free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* SIREn is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public
* License along with this program. If not, see .
*/
package com.sindicetech.siren.util;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
public class IOUtils {
/**
* Flatten a list of triples to n-tuples containing many objects for the same
* subject/predicate pair. Generate one n-tuple per subject/predicate pair.
*
* This is useful for the document-centric indexing approach. The flatten
* representation is more efficient in term of index size than the plain
* n-triples approach.
*
* @param values The list of n-triples.
* @return The n-tuples concatenated.
*/
public static String flattenNTriples(final String[] values) {
final Map map = new HashMap();
return flattenNTriples(values, map);
}
/**
* Sort and flatten a list of triples to n-tuples containing many objects for
* the same subject/predicate pair. Generate one n-tuple per subject/predicate
* pair. The tuples are ordered by subject/predicate.
*
* This is useful for the document-centric indexing approach. The sorted and
* flatten representation is generally more efficient in term of index size
* than the normal flatten approach.
*
* @param values The list of n-triples.
* @return The n-tuples concatenated.
*/
public static String sortAndFlattenNTriples(final String[] values) {
final Map map = new TreeMap();
return flattenNTriples(values, map);
}
/**
* Flatten a list of triples to n-tuples containing many objects for the same
* subject/predicate pair. Generate one n-tuple per subject/predicate pair.
*
* This is useful for the document-centric indexing approach. The flatten
* representation is more efficient in term of index size than the plain
* n-triples approach.
*
* @param values The list of n-triples.
* @return The n-tuples concatenated.
*/
private static String flattenNTriples(final String[] values, final Map map) {
for (final String value : values) {
if (value != null) {
final int firstWhitespace = value.indexOf(' ');
final int secondWhitespace = value.indexOf(' ', firstWhitespace + 1);
final int lastDot = value.lastIndexOf('.');
if (firstWhitespace == -1 || secondWhitespace == -1 || lastDot == -1) {
continue; // probably invalid triple, just skip it
}
final String key = value.substring(0, secondWhitespace);
final String object = value.substring(secondWhitespace, lastDot - 1);
StringBuilder tb = map.get(key);
if (tb == null) {
tb = new StringBuilder();
tb.append(key);
map.put(key, tb);
}
tb.append(object);
}
}
final StringBuilder result = new StringBuilder();
for (final StringBuilder tb : map.values()) {
result.append(tb).append('.').append("\n");
}
return result.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy