com.github.gumtreediff.io.TreeIoUtils Maven / Gradle / Ivy
The newest version!
/*
* This file is part of GumTree.
*
* GumTree is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GumTree is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with GumTree. If not, see .
*
* Copyright 2011-2015 Jean-Rémy Falleri
* Copyright 2011-2015 Floréal Morandat
*/
package com.github.gumtreediff.io;
import com.github.gumtreediff.gen.Register;
import com.github.gumtreediff.gen.TreeGenerator;
import com.github.gumtreediff.matchers.MappingStore;
import com.github.gumtreediff.tree.*;
import com.github.gumtreediff.tree.TreeContext.MetadataSerializers;
import com.github.gumtreediff.tree.TreeContext.MetadataUnserializers;
import com.google.gson.stream.JsonWriter;
import javax.xml.namespace.QName;
import javax.xml.stream.*;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayDeque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import static com.github.gumtreediff.tree.TypeSet.type;
/**
* Class providing static utility IO methods.
* This class is not designed to be instantiated.
*/
public final class TreeIoUtils {
private TreeIoUtils() {
}
public static TreeGenerator fromXml() {
return new XmlInternalGenerator();
}
public static TreeGenerator fromXml(MetadataUnserializers unserializers) {
XmlInternalGenerator generator = new XmlInternalGenerator();
generator.getUnserializers().addAll(unserializers);
return generator;
}
public static TreeSerializer toXml(TreeContext ctx) {
return toXml(ctx, ctx.getRoot());
}
public static TreeSerializer toXml(TreeContext ctx, Tree root) {
return new TreeSerializer(ctx, root) {
@Override
protected TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializers, Writer writer)
throws XMLStreamException {
return new XmlFormatter(writer, ctx);
}
};
}
public static TreeSerializer toAnnotatedXml(TreeContext ctx, boolean isSrc, MappingStore m) {
return toAnnotatedXml(ctx, ctx.getRoot(), isSrc, m);
}
public static TreeSerializer toAnnotatedXml(TreeContext ctx, Tree root, boolean isSrc, MappingStore m) {
return new TreeSerializer(ctx, root) {
@Override
protected TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializers, Writer writer)
throws XMLStreamException {
return new XmlAnnotatedFormatter(writer, ctx, isSrc, m);
}
};
}
public static TreeSerializer toCompactXml(TreeContext ctx) {
return toCompactXml(ctx, ctx.getRoot());
}
public static TreeSerializer toCompactXml(TreeContext ctx, Tree root) {
return new TreeSerializer(ctx, root) {
@Override
protected TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializers, Writer writer)
throws Exception {
return new XmlCompactFormatter(writer, ctx);
}
};
}
public static TreeSerializer toJson(TreeContext ctx) {
return toJson(ctx, ctx.getRoot());
}
public static TreeSerializer toJson(TreeContext ctx, Tree root) {
return new TreeSerializer(ctx, root) {
@Override
protected TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializers, Writer writer) {
return new JsonFormatter(writer, ctx);
}
};
}
public static TreeSerializer toLisp(TreeContext ctx) {
return toLisp(ctx, ctx.getRoot());
}
public static TreeSerializer toLisp(TreeContext ctx, Tree tree) {
return new TreeSerializer(ctx, tree) {
@Override
protected TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializers, Writer writer) {
return new LispFormatter(writer, ctx);
}
};
}
public static TreeSerializer toDot(TreeContext ctx) {
return toDot(ctx, ctx.getRoot());
}
public static TreeSerializer toDot(TreeContext ctx, Tree root) {
return new TreeSerializer(ctx, root) {
@Override
protected TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializer, Writer writer) {
return new DotFormatter(writer, ctx);
}
};
}
public static TreeSerializer toText(TreeContext ctx) {
return toText(ctx, ctx.getRoot());
}
public static TreeSerializer toText(TreeContext ctx, Tree root) {
return new TreeSerializer(ctx, root) {
@Override
protected TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializer, Writer writer) {
return new TextFormatter(writer, ctx);
}
};
}
public static TreeSerializer toShortText(Tree root) {
return new TreeSerializer(null, root) {
@Override
protected TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializer, Writer writer) {
return new ShortTextFormatter(writer, ctx);
}
};
}
public abstract static class AbstractSerializer {
public abstract void writeTo(Writer writer) throws Exception;
public void writeTo(OutputStream writer) throws Exception {
// FIXME Since the stream is already open, we should not close it, however due to semantic issue
// it should stay like this
try (OutputStreamWriter os = new OutputStreamWriter(writer, StandardCharsets.UTF_8)) {
writeTo(os);
}
}
@Override
public String toString() {
try (StringWriter s = new StringWriter()) {
writeTo(s);
return s.toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public void writeTo(String file) throws Exception {
try (Writer w = Files.newBufferedWriter(Paths.get(file), Charset.forName("UTF-8"))) {
writeTo(w);
}
}
public void writeTo(File file) throws Exception {
try (Writer w = Files.newBufferedWriter(file.toPath(), Charset.forName("UTF-8"))) {
writeTo(w);
}
}
}
public abstract static class TreeSerializer extends AbstractSerializer {
private final TreeContext context;
private final Tree root;
final MetadataSerializers serializers = new MetadataSerializers();
public TreeSerializer(TreeContext ctx, Tree root) {
context = ctx;
this.root = root;
if (ctx != null)
serializers.addAll(ctx.getSerializers());
}
protected abstract TreeFormatter newFormatter(TreeContext ctx, MetadataSerializers serializers, Writer writer)
throws Exception;
public void writeTo(Writer writer) throws Exception {
TreeFormatter formatter = newFormatter(context, serializers, writer);
try {
writeTree(formatter, root);
} finally {
formatter.close();
}
}
private void forwardException(Exception e) {
throw new FormatException(e);
}
protected void writeTree(TreeFormatter formatter, Tree root) throws Exception {
formatter.startSerialization();
if (context != null)
writeAttributes(formatter, context.getMetadata());
formatter.endProlog();
try {
TreeVisitor.visitTree(root, new TreeVisitor() {
@Override
public void startTree(Tree tree) {
try {
assert tree != null;
formatter.startTree(tree);
writeAttributes(formatter, tree.getMetadata());
formatter.endTreeProlog(tree);
} catch (Exception e) {
forwardException(e);
}
}
@Override
public void endTree(Tree tree) {
try {
formatter.endTree(tree);
} catch (Exception e) {
forwardException(e);
}
}
});
} catch (FormatException e) {
throw e.getCause();
}
formatter.stopSerialization();
}
protected void writeAttributes(TreeFormatter formatter, Iterator> it) throws Exception {
while (it.hasNext()) {
Entry entry = it.next();
serializers.serialize(formatter, entry.getKey(), entry.getValue());
}
}
public TreeSerializer export(String name, MetadataSerializer serializer) {
serializers.add(name, serializer);
return this;
}
public TreeSerializer export(String... name) {
for (String n : name)
serializers.add(n, Object::toString);
return this;
}
}
public interface TreeFormatter {
void startSerialization() throws Exception;
void endProlog() throws Exception;
void stopSerialization() throws Exception;
void startTree(Tree tree) throws Exception;
void endTreeProlog(Tree tree) throws Exception;
void endTree(Tree tree) throws Exception;
void close() throws Exception;
void serializeAttribute(String name, String value) throws Exception;
}
@FunctionalInterface
public interface MetadataSerializer {
String toString(Object object);
}
@FunctionalInterface
public interface MetadataUnserializer {
Object fromString(String value);
}
static class FormatException extends RuntimeException {
private static final long serialVersionUID = 593766540545763066L;
Exception cause;
public FormatException(Exception cause) {
super(cause);
this.cause = cause;
}
@Override
public synchronized Exception getCause() {
return cause;
}
}
static class TreeFormatterAdapter implements TreeFormatter {
protected final TreeContext context;
protected TreeFormatterAdapter(TreeContext ctx) {
context = ctx;
}
@Override
public void startSerialization() throws Exception {
}
@Override
public void endProlog() throws Exception {
}
@Override
public void startTree(Tree tree) throws Exception {
}
@Override
public void endTreeProlog(Tree tree) throws Exception {
}
@Override
public void endTree(Tree tree) throws Exception {
}
@Override
public void stopSerialization() throws Exception {
}
@Override
public void close() throws Exception {
}
@Override
public void serializeAttribute(String name, String value) throws Exception {
}
}
abstract static class AbsXmlFormatter extends TreeFormatterAdapter {
protected final XMLStreamWriter writer;
protected AbsXmlFormatter(Writer w, TreeContext ctx) throws XMLStreamException {
super(ctx);
XMLOutputFactory f = XMLOutputFactory.newInstance();
writer = new IndentingXMLStreamWriter(f.createXMLStreamWriter(w));
}
@Override
public void startSerialization() throws XMLStreamException {
writer.writeStartDocument();
}
@Override
public void stopSerialization() throws XMLStreamException {
writer.writeEndDocument();
}
@Override
public void close() throws XMLStreamException {
writer.close();
}
}
static class XmlFormatter extends AbsXmlFormatter {
public XmlFormatter(Writer w, TreeContext ctx) throws XMLStreamException {
super(w, ctx);
}
@Override
public void startSerialization() throws XMLStreamException {
super.startSerialization();
writer.writeStartElement("root");
writer.writeStartElement("context");
}
@Override
public void endProlog() throws XMLStreamException {
writer.writeEndElement();
}
@Override
public void stopSerialization() throws XMLStreamException {
writer.writeEndElement();
super.stopSerialization();
}
@Override
public void serializeAttribute(String name, String value) throws XMLStreamException {
writer.writeStartElement(name);
writer.writeCharacters(value);
writer.writeEndElement();
}
@Override
public void startTree(Tree tree) throws XMLStreamException {
writer.writeStartElement("tree");
writer.writeAttribute("type", tree.getType().toString());
if (tree.hasLabel()) writer.writeAttribute("label", tree.getLabel());
if (Tree.NO_POS != tree.getPos()) {
writer.writeAttribute("pos", Integer.toString(tree.getPos()));
writer.writeAttribute("length", Integer.toString(tree.getLength()));
}
}
@Override
public void endTree(Tree tree) throws XMLStreamException {
writer.writeEndElement();
}
}
static class XmlAnnotatedFormatter extends XmlFormatter {
final SearchOther searchOther;
public XmlAnnotatedFormatter(Writer w, TreeContext ctx, boolean isSrc,
MappingStore m) throws XMLStreamException {
super(w, ctx);
if (isSrc)
searchOther = (tree) -> m.isSrcMapped(tree) ? m.getDstForSrc(tree) : null;
else
searchOther = (tree) -> m.isDstMapped(tree) ? m.getSrcForDst(tree) : null;
}
interface SearchOther {
Tree lookup(Tree tree);
}
@Override
public void startTree(Tree tree) throws XMLStreamException {
super.startTree(tree);
Tree o = searchOther.lookup(tree);
if (o != null) {
if (Tree.NO_POS != o.getPos()) {
writer.writeAttribute("other_pos", Integer.toString(o.getPos()));
writer.writeAttribute("other_length", Integer.toString(o.getLength()));
}
}
}
}
static class XmlCompactFormatter extends AbsXmlFormatter {
public XmlCompactFormatter(Writer w, TreeContext ctx) throws XMLStreamException {
super(w, ctx);
}
@Override
public void startSerialization() throws XMLStreamException {
super.startSerialization();
writer.writeStartElement("root");
}
@Override
public void stopSerialization() throws XMLStreamException {
writer.writeEndElement();
super.stopSerialization();
}
@Override
public void serializeAttribute(String name, String value) throws XMLStreamException {
writer.writeAttribute(name, value);
}
@Override
public void startTree(Tree tree) throws XMLStreamException {
if (tree.getChildren().size() == 0)
writer.writeEmptyElement(tree.getType().toString());
else
writer.writeStartElement(tree.getType().toString());
if (tree.hasLabel())
writer.writeAttribute("label", tree.getLabel());
}
@Override
public void endTree(Tree tree) throws XMLStreamException {
if (tree.getChildren().size() > 0)
writer.writeEndElement();
}
}
static class LispFormatter extends TreeFormatterAdapter {
protected final Writer writer;
protected final Pattern protectChars = Pattern.compile("[ ,\"]");
protected final Pattern escapeChars = Pattern.compile("[\\\\\"]");
int level = 0;
protected LispFormatter(Writer w, TreeContext ctx) {
super(ctx);
writer = w;
}
@Override
public void startSerialization() throws IOException {
writer.write("((");
}
@Override
public void startTree(Tree tree) throws IOException {
if (!tree.isRoot())
writer.write("\n");
for (int i = 0; i < level; i++)
writer.write(" ");
level++;
String pos = (Tree.NO_POS == tree.getPos() ? "" : String.format("(%d %d)",
tree.getPos(), tree.getLength()));
writer.write(String.format("(%s %s (%s",
protect(tree.getType().toString()), protect(tree.getLabel()), pos));
}
@Override
public void endProlog() throws Exception {
writer.append(") ");
}
@Override
public void endTreeProlog(Tree tree) throws Exception {
writer.append(") (");
}
@Override
public void serializeAttribute(String name, String value) throws Exception {
writer.append(String.format("(:%s %s) ", name, protect(value)));
}
protected String protect(String val) {
String text = escapeChars.matcher(val).replaceAll("\\\\$0");
if (protectChars.matcher(text).find() || val.isEmpty())
return String.format("\"%s\"", text);
else
return text;
}
@Override
public void endTree(Tree tree) throws IOException {
writer.write(")");
level--;
}
@Override
public void stopSerialization() throws IOException {
writer.write(")");
}
}
static class DotFormatter extends TreeFormatterAdapter {
protected final Writer writer;
private static AtomicLong idCounter = new AtomicLong();
private Map idForTrees = new HashMap<>();
protected DotFormatter(Writer w, TreeContext ctx) {
super(ctx);
writer = w;
}
@Override
public void startSerialization() throws Exception {
writer.write("digraph G {\n");
}
@Override
public void startTree(Tree tree) throws Exception {
String label = getCleanLabel(tree);
writer.write(String.format("\t%s [label=\"%s\"];\n", id(tree), label));
if (tree.getParent() != null)
writer.write(String.format("\t%s -> %s;\n", id(tree.getParent()), id(tree)));
}
@Override
public void stopSerialization() throws Exception {
writer.write("}");
}
private String getCleanLabel(Tree tree) {
String label = tree.toString();
if (label.contains("\"") || label.contains("\\s"))
label = label
.replaceAll("\"", "")
.replaceAll("\\s", "")
.replaceAll("\\\\", "");
if (label.length() > 30)
label = label.substring(0, 30);
return label;
}
private String id(Tree t) {
if (idForTrees.containsKey(t))
return idForTrees.get(t);
else {
String id = generateId();
idForTrees.put(t, id);
return id;
}
}
private static String generateId() {
return "id_" + idCounter.getAndIncrement();
}
}
static class JsonFormatter extends TreeFormatterAdapter {
private final JsonWriter writer;
public JsonFormatter(Writer w, TreeContext ctx) {
super(ctx);
writer = new JsonWriter(w);
writer.setIndent(" ");
}
@Override
public void startTree(Tree t) throws IOException {
writer.beginObject();
writer.name("type").value(t.getType().toString());
if (t.hasLabel()) writer.name("label").value(t.getLabel());
if (Tree.NO_POS != t.getPos()) {
writer.name("pos").value(Integer.toString(t.getPos()));
writer.name("length").value(Integer.toString(t.getLength()));
}
}
@Override
public void endTreeProlog(Tree tree) throws IOException {
writer.name("children");
writer.beginArray();
}
@Override
public void endTree(Tree tree) throws IOException {
writer.endArray();
writer.endObject();
}
@Override
public void startSerialization() throws IOException {
writer.beginObject();
writer.setIndent("\t");
}
@Override
public void endProlog() throws IOException {
writer.name("root");
}
@Override
public void serializeAttribute(String key, String value) throws IOException {
writer.name(key).value(value);
}
@Override
public void stopSerialization() throws IOException {
writer.endObject();
}
@Override
public void close() throws IOException {
writer.close();
}
}
public abstract static class AbstractTextFormatter extends TreeFormatterAdapter {
protected final Writer writer;
int level = 0;
public AbstractTextFormatter(Writer w, TreeContext ctx) {
super(ctx);
writer = w;
}
protected void indent(int level, String prefix) throws IOException {
for (int i = 0; i < level; i++)
writer.write(prefix);
}
@Override
public void startTree(Tree tree) throws IOException {
if (level != 0) writer.write("\n");
indent(level, " ");
level++;
writeTree(tree);
}
protected abstract void writeTree(Tree tree) throws IOException;
@Override
public void endTree(Tree tree) throws IOException {
level--;
}
}
public static class TextFormatter extends AbstractTextFormatter {
public TextFormatter(Writer w, TreeContext ctx) {
super(w, ctx);
}
@Override
public void writeTree(Tree tree) throws IOException {
writer.write(tree.toString());
}
}
public static class ShortTextFormatter extends AbstractTextFormatter {
public ShortTextFormatter(Writer w, TreeContext ctx) {
super(w, ctx);
}
@Override
public void writeTree(Tree tree) throws IOException {
writer.write(tree.toString());
}
}
@Register(id = "xml", accept = "\\.gxml$")
// TODO Since it is not in the right package, I'm not even sure it is visible in the registry
// TODO should we move this class elsewhere (another package)
public static class XmlInternalGenerator extends TreeGenerator {
static MetadataUnserializers defaultUnserializers = new MetadataUnserializers();
final MetadataUnserializers unserializers = new MetadataUnserializers(); // FIXME should it be pushed up or not?
private static final QName TYPE = new QName("type");
private static final QName LABEL = new QName("label");
private static final String POS = "pos";
private static final String LENGTH = "length";
static {
defaultUnserializers.add(POS, Integer::parseInt);
defaultUnserializers.add(LENGTH, Integer::parseInt);
}
public XmlInternalGenerator() {
unserializers.addAll(defaultUnserializers);
}
@Override
protected TreeContext generate(Reader source) throws IOException {
XMLInputFactory fact = XMLInputFactory.newInstance();
TreeContext context = new TreeContext();
try {
ArrayDeque trees = new ArrayDeque<>();
XMLEventReader r = fact.createXMLEventReader(source);
while (r.hasNext()) {
XMLEvent e = r.nextEvent();
if (e instanceof StartElement) {
StartElement s = (StartElement) e;
if (!s.getName().getLocalPart().equals("tree")) // FIXME need to deal with options
continue;
Type type = type(s.getAttributeByName(TYPE).getValue());
Tree t = context.createTree(type, labelForAttribute(s, LABEL));
// FIXME this iterator has no type, due to the API. We have to cast it later
Iterator> it = s.getAttributes();
while (it.hasNext()) {
Attribute a = (Attribute) it.next();
unserializers.load(t, a.getName().getLocalPart(), a.getValue());
}
if (trees.isEmpty())
context.setRoot(t);
else
t.setParentAndUpdateChildren(trees.peekFirst());
trees.addFirst(t);
} else if (e instanceof EndElement) {
if (!((EndElement) e).getName().getLocalPart().equals("tree")) // FIXME need to deal with option
continue;
trees.removeFirst();
}
}
return context;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
private static String labelForAttribute(StartElement s, QName attrName) {
Attribute attr = s.getAttributeByName(attrName);
return attr == null ? Tree.NO_LABEL : attr.getValue();
}
public MetadataUnserializers getUnserializers() {
return unserializers;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy