All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metafacture.triples.StreamToTriples Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2013, 2014 Deutsche Nationalbibliothek
 *
 * Licensed under the Apache License, Version 2.0 the "License";
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.metafacture.triples;

import org.metafacture.formeta.formatter.ConciseFormatter;
import org.metafacture.formeta.formatter.Formatter;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.StandardEventNames;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;
import org.metafacture.framework.objects.Triple;
import org.metafacture.framework.objects.Triple.ObjectType;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Emits the literals which are received as triples such
 * that the name and value become the predicate and the object
 * of the triple. The record id containing the literal becomes
 * the subject.
 * 

* If 'redirect' is true, the value of the subject is determined * by using either the value of a literal named '_id', or for * individual literals by prefixing their name with '{to:ID}'. *

* Set 'recordPredicate' to encode a complete record in one triple. * The value of 'recordPredicate' is used as the predicate of the * triple. If 'recordPredicate' is set, no {to:ID}NAME-style * redirects are possible. * * @author Markus Michael Geipel * */ @Description("Emits the literals which are received as triples such " + "that the name and value become the predicate and the object " + "of the triple. The record id containing the literal becomes " + "the subject. " + "If 'redirect' is true, the value of the subject is determined " + "by using either the value of a literal named '_id', or for " + "individual literals by prefixing their name with '{to:ID}'. " + "Set 'recordPredicate' to encode a complete record in one triple. " + "The value of 'recordPredicate' is used as the predicate of the " + "triple. If 'recordPredicate' is set, no {to:ID}NAME-style " + "redirects are possible.") @In(StreamReceiver.class) @Out(Triple.class) @FluxCommand("stream-to-triples") public final class StreamToTriples extends DefaultStreamPipe> { private static final Pattern REDIRECT_PATTERN = Pattern.compile("^\\{to:(.+)}(.+)$"); private final List nameBuffer = new ArrayList(); private final List valueBuffer = new ArrayList(); private final List typeBuffer = new ArrayList(); private final Formatter formatter = new ConciseFormatter(); private boolean redirect; private String recordPredicate; private int nestingLevel; private int encodeLevel; private String predicateName; private String currentId; /** * Creates an instance of {@link StreamToTriples}. */ public StreamToTriples() { } /** * Checks whether redirection is enabled. * * @return true if redirection is enabled. */ public boolean isRedirect() { return redirect; } /** * Flags whether to set redirect. * * @param redirect true if to redirect */ public void setRedirect(final boolean redirect) { this.redirect = redirect; } /** * Gest the record predicate. * * @return the record predicate */ public String getRecordPredicate() { return recordPredicate; } /** * Sets the record predicate to encode a complete record in one triple. * * @param recordPredicate the record predicate */ public void setRecordPredicate(final String recordPredicate) { this.recordPredicate = recordPredicate; } @Override public void startRecord(final String identifier) { assert !isClosed(); currentId = identifier; if (recordPredicate != null) { encodeLevel = 0; startEncode(recordPredicate); } else { encodeLevel = 1; } nestingLevel = 1; } @Override public void endRecord() { assert !isClosed(); nestingLevel = 0; if (nestingLevel == encodeLevel) { endEncode(); } if (redirect) { for (int i = 0; i < nameBuffer.size(); ++i) { getReceiver().process(new Triple(currentId, nameBuffer.get(i), valueBuffer.get(i), typeBuffer.get(i))); } nameBuffer.clear(); valueBuffer.clear(); typeBuffer.clear(); } } @Override public void startEntity(final String name) { assert !isClosed(); if (nestingLevel > encodeLevel) { formatter.startGroup(name); } else { startEncode(name); } ++nestingLevel; } @Override public void endEntity() { assert !isClosed(); --nestingLevel; if (nestingLevel == encodeLevel) { endEncode(); } else { formatter.endGroup(); } } @Override public void literal(final String name, final String value) { assert !isClosed(); if (nestingLevel > encodeLevel) { if (nestingLevel == 1 && redirect && StandardEventNames.ID.equals(name)) { currentId = value; } else { formatter.literal(name, value); } } else { dispatch(name, value, ObjectType.STRING); } } private void startEncode(final String predicate) { predicateName = predicate; formatter.reset(); formatter.startGroup(""); } private void endEncode() { formatter.endGroup(); dispatch(predicateName, formatter.toString(), ObjectType.ENTITY); } private void dispatch(final String name, final String value, final ObjectType type) { if (redirect) { if (StandardEventNames.ID.equals(name)) { currentId = value; } else { final Matcher matcher = REDIRECT_PATTERN.matcher(name); if (matcher.find()) { getReceiver().process(new Triple(matcher.group(1), matcher.group(2), value, type)); } else { nameBuffer.add(name); valueBuffer.add(value); typeBuffer.add(type); } } } else { getReceiver().process(new Triple(currentId, name, value, type)); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy