org.metafacture.triples.StreamToTriples Maven / Gradle / Ivy
Show all versions of metafacture-triples Show documentation
/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.metafacture.triples;
import org.metafacture.formeta.formatter.ConciseFormatter;
import org.metafacture.formeta.formatter.Formatter;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.StandardEventNames;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;
import org.metafacture.framework.objects.Triple;
import org.metafacture.framework.objects.Triple.ObjectType;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Emits the literals which are received as triples such
* that the name and value become the predicate and the object
* of the triple. The record id containing the literal becomes
* the subject.
*
* If 'redirect' is true, the value of the subject is determined
* by using either the value of a literal named '_id', or for
* individual literals by prefixing their name with '{to:ID}'.
*
* Set 'recordPredicate' to encode a complete record in one triple.
* The value of 'recordPredicate' is used as the predicate of the
* triple. If 'recordPredicate' is set, no {to:ID}NAME-style
* redirects are possible.
*
* @author Markus Michael Geipel
*
*/
@Description("Emits the literals which are received as triples such " +
"that the name and value become the predicate and the object " +
"of the triple. The record id containing the literal becomes " +
"the subject. " +
"If 'redirect' is true, the value of the subject is determined " +
"by using either the value of a literal named '_id', or for " +
"individual literals by prefixing their name with '{to:ID}'. " +
"Set 'recordPredicate' to encode a complete record in one triple. " +
"The value of 'recordPredicate' is used as the predicate of the " +
"triple. If 'recordPredicate' is set, no {to:ID}NAME-style " +
"redirects are possible.")
@In(StreamReceiver.class)
@Out(Triple.class)
@FluxCommand("stream-to-triples")
public final class StreamToTriples extends DefaultStreamPipe> {
private static final Pattern REDIRECT_PATTERN = Pattern.compile("^\\{to:(.+)}(.+)$");
private final List nameBuffer = new ArrayList();
private final List valueBuffer = new ArrayList();
private final List typeBuffer = new ArrayList();
private final Formatter formatter = new ConciseFormatter();
private boolean redirect;
private String recordPredicate;
private int nestingLevel;
private int encodeLevel;
private String predicateName;
private String currentId;
/**
* Creates an instance of {@link StreamToTriples}.
*/
public StreamToTriples() {
}
/**
* Checks whether redirection is enabled.
*
* @return true if redirection is enabled.
*/
public boolean isRedirect() {
return redirect;
}
/**
* Flags whether to set redirect.
*
* @param redirect true if to redirect
*/
public void setRedirect(final boolean redirect) {
this.redirect = redirect;
}
/**
* Gest the record predicate.
*
* @return the record predicate
*/
public String getRecordPredicate() {
return recordPredicate;
}
/**
* Sets the record predicate to encode a complete record in one triple.
*
* @param recordPredicate the record predicate
*/
public void setRecordPredicate(final String recordPredicate) {
this.recordPredicate = recordPredicate;
}
@Override
public void startRecord(final String identifier) {
assert !isClosed();
currentId = identifier;
if (recordPredicate != null) {
encodeLevel = 0;
startEncode(recordPredicate);
}
else {
encodeLevel = 1;
}
nestingLevel = 1;
}
@Override
public void endRecord() {
assert !isClosed();
nestingLevel = 0;
if (nestingLevel == encodeLevel) {
endEncode();
}
if (redirect) {
for (int i = 0; i < nameBuffer.size(); ++i) {
getReceiver().process(new Triple(currentId, nameBuffer.get(i), valueBuffer.get(i), typeBuffer.get(i)));
}
nameBuffer.clear();
valueBuffer.clear();
typeBuffer.clear();
}
}
@Override
public void startEntity(final String name) {
assert !isClosed();
if (nestingLevel > encodeLevel) {
formatter.startGroup(name);
}
else {
startEncode(name);
}
++nestingLevel;
}
@Override
public void endEntity() {
assert !isClosed();
--nestingLevel;
if (nestingLevel == encodeLevel) {
endEncode();
}
else {
formatter.endGroup();
}
}
@Override
public void literal(final String name, final String value) {
assert !isClosed();
if (nestingLevel > encodeLevel) {
if (nestingLevel == 1 && redirect && StandardEventNames.ID.equals(name)) {
currentId = value;
}
else {
formatter.literal(name, value);
}
}
else {
dispatch(name, value, ObjectType.STRING);
}
}
private void startEncode(final String predicate) {
predicateName = predicate;
formatter.reset();
formatter.startGroup("");
}
private void endEncode() {
formatter.endGroup();
dispatch(predicateName, formatter.toString(), ObjectType.ENTITY);
}
private void dispatch(final String name, final String value, final ObjectType type) {
if (redirect) {
if (StandardEventNames.ID.equals(name)) {
currentId = value;
}
else {
final Matcher matcher = REDIRECT_PATTERN.matcher(name);
if (matcher.find()) {
getReceiver().process(new Triple(matcher.group(1), matcher.group(2), value, type));
}
else {
nameBuffer.add(name);
valueBuffer.add(value);
typeBuffer.add(type);
}
}
}
else {
getReceiver().process(new Triple(currentId, name, value, type));
}
}
}