
com.metreeca.text.matchers.GraphDBMatcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of metreeca-text Show documentation
Show all versions of metreeca-text Show documentation
A lightweight NLP framework.
The newest version!
/*
* Copyright © 2013-2022 Metreeca srl
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metreeca.text.matchers;
import com.metreeca.json.Frame;
import com.metreeca.json.Values;
import com.metreeca.rdf4j.actions.GraphQuery;
import com.metreeca.rdf4j.services.Graph;
import com.metreeca.rest.Xtream;
import com.metreeca.rest.actions.Fill;
import com.metreeca.text.Match;
import com.metreeca.text.Notes;
import org.eclipse.rdf4j.model.*;
import java.math.BigDecimal;
import java.util.*;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import static com.metreeca.json.Frame.frame;
import static com.metreeca.json.Values.*;
import static com.metreeca.rdf4j.services.Graph.graph;
import static com.metreeca.rest.Toolbox.service;
import static com.metreeca.rest.services.Logger.logger;
import static java.util.regex.Pattern.compile;
import static java.util.stream.Collectors.joining;
public final class GraphDBMatcher implements Function, Stream>> {
private static final IRI HasRDFRank=iri("http://www.ontotext.com/owlim/RDFRank#hasRDFRank");
private static final Pattern EscapePattern=compile("[^\\s\\p{LD}]");
private static String lucene(final CharSequence keywords) {
return "\""+EscapePattern.matcher(keywords).replaceAll("\\\\$0")+"\"";
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
private final int size=1_000;
private final String index="entities";
private final Collection labels=Notes.Labels;
private Collection languages=Notes.Languages;
private final Graph graph=service(graph());
public GraphDBMatcher languages(final Collection languages) {
if ( languages == null || languages.stream().anyMatch(Objects::isNull) ) {
throw new NullPointerException("null languages");
}
this.languages=new HashSet<>(languages);
return this;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@Override public Stream> apply(final Stream anchors) {
return Xtream.from(anchors)
.batch(size)
.flatMap(new Fill>() // keep aligned with index definition
.model("prefix rdfs: \n"
+"\n"
+"prefix lucene: \n"
+"prefix index: \n"
+"\n"
+"prefix rank: \n"
+"\n"
+"construct { ?s ?p ?o; rank:hasRDFRank ?w } where {\n"
+"\n"
+"\tvalues ?a {\n"
+"\t\t{anchors}\n"
+"\t}\n"
+"\n"
+"\t[a index:{index}; lucene:query ?a; lucene:entities ?s].\n"
+"\t\t\n"
+"\t?s ?p ?o; rank:hasRDFRank5 ?w.\n"
+"\t\n"
+"\tfilter (isIRI(?o) || lang(?o) in ({languages}) && ?p in (\n" // !!! empty langs!
+"\t\t{labels}, \n"
+"\t\trdfs:description\n"
+"\t))\n"
+"\n"
+"}"
)
.value("index", index)
.value("anchors", batch -> batch.stream()
.map(s -> quote(lucene(s)))
.collect(joining("\n\t\t"))
)
.value("languages", languages.stream()
.map(Values::quote)
.collect(joining(", "))
)
.value("labels", labels.stream()
.map(Values::format)
.collect(joining(",\n\t\t"))
)
)
.flatMap(new GraphQuery().graph(graph))
.groupBy(Statement::getSubject)
.map(entry -> frame(entry.getKey(), entry.getValue()))
.flatMap(frame -> {
final double weight=frame
.decimal(HasRDFRank)
.orElse(BigDecimal.ZERO)
.doubleValue();
return frame.model()
.filter(s -> labels.contains(s.getPredicate()))
.map(Statement::getObject)
.map(Value::stringValue)
.map(label -> new Match<>(label, frame, weight));
});
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public static final class Indexer implements Runnable {
@Override public void run() {
service(graph()).update(connection -> { // ;( in a separate transaction
service(logger()).info(GraphDBMatcher.class, "updating RDF rank");
connection.add(bnode(), iri("http://www.ontotext.com/owlim/RDFRank#compute"), bnode());
return this;
});
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy