com.sindicetech.siren.util.IOUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of siren-core Show documentation
SIREn core module
The newest version!
/**
 * Copyright (c) 2014, Sindice Limited. All Rights Reserved.
 *
 * This file is part of the SIREn project.
 *
 * SIREn is a free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * SIREn is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public
 * License along with this program. If not, see .
 */

package com.sindicetech.siren.util;

import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;

public class IOUtils {

  /**
   * Flatten a list of triples to n-tuples containing many objects for the same
   * subject/predicate pair. Generate one n-tuple per subject/predicate pair.
   * 

   * This is useful for the document-centric indexing approach. The flatten
   * representation is more efficient in term of index size than the plain
   * n-triples approach.
   *
   * @param values The list of n-triples.
   * @return The n-tuples concatenated.
   */
  public static String flattenNTriples(final String[] values) {
    final Map map = new HashMap();
    return flattenNTriples(values, map);
  }

  /**
   * Sort and flatten a list of triples to n-tuples containing many objects for
   * the same subject/predicate pair. Generate one n-tuple per subject/predicate
   * pair. The tuples are ordered by subject/predicate.
   * 

   * This is useful for the document-centric indexing approach. The sorted and
   * flatten representation is generally more efficient in term of index size
   * than the normal flatten approach.
   *
   * @param values The list of n-triples.
   * @return The n-tuples concatenated.
   */
  public static String sortAndFlattenNTriples(final String[] values) {
    final Map map = new TreeMap();
    return flattenNTriples(values, map);
  }

  /**
   * Flatten a list of triples to n-tuples containing many objects for the same
   * subject/predicate pair. Generate one n-tuple per subject/predicate pair.
   * 

   * This is useful for the document-centric indexing approach. The flatten
   * representation is more efficient in term of index size than the plain
   * n-triples approach.
   *
   * @param values The list of n-triples.
   * @return The n-tuples concatenated.
   */
  private static String flattenNTriples(final String[] values, final Map map) {
    for (final String value : values) {
      if (value != null) {
        final int firstWhitespace = value.indexOf(' ');
        final int secondWhitespace = value.indexOf(' ', firstWhitespace + 1);
        final int lastDot = value.lastIndexOf('.');
        if (firstWhitespace == -1 || secondWhitespace == -1 || lastDot == -1) {
          continue; // probably invalid triple, just skip it
        }
        final String key = value.substring(0, secondWhitespace);
        final String object = value.substring(secondWhitespace, lastDot - 1);
        StringBuilder tb = map.get(key);
        if (tb == null) {
          tb = new StringBuilder();
          tb.append(key);
          map.put(key, tb);
        }
        tb.append(object);
      }
    }
    final StringBuilder result = new StringBuilder();
    for (final StringBuilder tb : map.values()) {
      result.append(tb).append('.').append("\n");
    }
    return result.toString();
  }

}