All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.uima.json.JsonCasSerializer Maven / Gradle / Ivy

Go to download

JSON support for UIMA SDK. This module is deprecated. Use https://github.com/apache/uima-uimaj-io-jsoncas instead.

There is a newer version: 3.5.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.json;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CommonArrayFS;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Marker;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.cas.impl.CasSerializerSupport;
import org.apache.uima.cas.impl.CasSerializerSupport.CasDocSerializer;
import org.apache.uima.cas.impl.CasSerializerSupport.CasSerializerSupportSerialize;
import org.apache.uima.cas.impl.FeatureImpl;
import org.apache.uima.cas.impl.LowLevelCAS;
import org.apache.uima.cas.impl.MarkerImpl;
import org.apache.uima.cas.impl.TypeImpl;
import org.apache.uima.cas.impl.TypeSystemImpl;
import org.apache.uima.cas.impl.XmiSerializationSharedData;
import org.apache.uima.cas.impl.XmiSerializationSharedData.XmiArrayElement;
import org.apache.uima.internal.util.IntVector;
import org.apache.uima.internal.util.Misc;
import org.apache.uima.internal.util.PositiveIntSet;
import org.apache.uima.internal.util.PositiveIntSet_impl;
import org.apache.uima.internal.util.XmlElementName;
import org.apache.uima.internal.util.function.IntConsumer_withIOException;
import org.apache.uima.internal.util.rb_trees.RedBlackTree;
import org.apache.uima.jcas.cas.BooleanArray;
import org.apache.uima.jcas.cas.ByteArray;
import org.apache.uima.jcas.cas.DoubleArray;
import org.apache.uima.jcas.cas.EmptyList;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.FloatArray;
import org.apache.uima.jcas.cas.IntegerArray;
import org.apache.uima.jcas.cas.LongArray;
import org.apache.uima.jcas.cas.NonEmptyFSList;
import org.apache.uima.jcas.cas.NonEmptyFloatList;
import org.apache.uima.jcas.cas.NonEmptyIntegerList;
import org.apache.uima.jcas.cas.NonEmptyStringList;
import org.apache.uima.jcas.cas.ShortArray;
import org.apache.uima.jcas.cas.Sofa;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.json.impl.JsonContentHandlerJacksonWrapper;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.io.SerializedString;

/**
 * 

CAS serializer for JSON formats.

*

Writes a CAS in a JSON format.

* *

To use,

*
    *
  • create an instance of this class,
  • *
  • (optionally) configure the instance, and then
  • *
  • call serialize on the instance, optionally passing in additional parameters.
* *

After the 1st 2 steps, the serializer instance may be used for multiple calls (on multiple threads) to * the 3rd serialize step, if all calls use the same configuration.

* *

There are "convenience" static serialize methods that do these three steps for common configurations.

* *

Parameters can be configured in this instance (I), and/or as part of the serialize(S) call.

* *

The parameters that can be configured are:

*
    *
  • (S) The CAS to serialize *
  • (S) where to put the output - an OutputStream, Writer, or File
  • *
  • (I,S) a type system - (default null) if supplied, it is used to "filter" types and features that are serialized. If provided, only * those that exist in the passed in type system are included in the serialization
  • *
  • (I,S) a flag for prettyprinting - default false (no prettyprinting)
  • *
* *

For Json serialization, additional configuration from the Jackson implementation can be configured

* on 2 associated Jackson instances: *
  • JsonFactory
  • *
  • JsonGenerator
* using the standard Jackson methods on the associated JsonFactory instance; * see the Jackson JsonFactory and JsonGenerator javadocs for details. * *

These 2 Jackson objects are settable/gettable from an instance of this class. * They are created if not supplied by the caller.

* *

Once this instance is configured, the serialize method is called * to serialized a CAS to an output.

* *

Instances of this class must be used on only one thread while configuration is being done; * afterwards, multiple threads may use the configured instance, to call serialize.

*/ public class JsonCasSerializer { private static final SerializedString CONTEXT_NAME = new SerializedString("_context"); private static final SerializedString TYPE_SYSTEM_NAME = new SerializedString("_type_system"); private static final SerializedString TYPES_NAME = new SerializedString("_types"); private static final SerializedString ID_NAME = new SerializedString("_id"); private static final SerializedString SUB_TYPES_NAME = new SerializedString("_subtypes"); private static final SerializedString FEATURE_TYPES_NAME = new SerializedString("_feature_types"); private static final SerializedString FEATURE_REFS_NAME = new SerializedString("_ref"); private static final SerializedString FEATURE_ARRAY_NAME = new SerializedString("_array"); private static final SerializedString FEATURE_BYTE_ARRAY_NAME = new SerializedString("_byte_array"); private static final SerializedString REFERENCED_FSS_NAME = new SerializedString("_referenced_fss"); private static final SerializedString VIEWS_NAME = new SerializedString("_views"); private static final SerializedString TYPE_NAME = new SerializedString("_type"); private static final SerializedString COLLECTION_NAME = new SerializedString("_collection"); private static final SerializedString DELTA_CAS_NAME = new SerializedString("_delta_cas"); private static final SerializedString ADDED_MEMBERS_NAME = new SerializedString("added_members"); private static final SerializedString DELETED_MEMBERS_NAME = new SerializedString("deleted_members"); private static final SerializedString REINDEXED_MEMBERS_NAME = new SerializedString("reindexed_members"); /** *

The serialization can optionally include context information in addition to the feature structures.

* *

This context information is specified, per used-type.

* *

It can be further subdivided into 3 parts:

*
    *
  1. What their (used) subtypes are. This enables iterating over a type * and all of its subtypes, e.g. an iterator over all "Annotations".
  2. *
  3. whether or not to include the map from short type names to their fully qualified equivalents.
  4. *
  5. Information to enable deserialization of some ambiguous values, depending on the range type of a feature *
* *

Some of these may be omitted, if not wanted. This enum allows specifying what to omit.

* */ public enum JsonContextFormat { omitContext, // omit the entire context omitSubtypes, omitExpandedTypeNames, } private final CasSerializerSupport css = new CasSerializerSupport(); // for testing CasSerializerSupport getCss() { return css; } private JsonFactory jsonFactory = null; private boolean isDynamicEmbedding = true; private boolean isWithContext = true; private boolean isWithSubtypes = true; private boolean isWithExpandedTypeNames = true; private boolean isOmit0Values = false; // https://issues.apache.org/jira/browse/UIMA-4117 private String typeSystemReference; /*********************************************** * C O N S T R U C T O R S * ***********************************************/ /** * Creates a new JsonCasSerializer */ public JsonCasSerializer() { } /************************************************** * J S O N * **************************************************/ /**************************************************** * Static JSON Serializer methods for convenience * * * * Note: these are named jsonSerialize * * The non-static methods * * are named serializeJson * ****************************************************/ /** * Serializes a CAS using JSON * * @param aCAS * CAS to serialize. * @param output * a File, OutputStream or Writer to which to write the XMI document * * @throws IOException if there was an IOException */ public static void jsonSerialize(CAS aCAS, Object output) throws IOException { jsonSerialize(aCAS, null, output, false, null, null); } /** * Serializes a CAS to an output (File, OutputStream, XMI stream, or Writer). * The supplied typesystem filters the output * * @param aCAS * CAS to serialize. * @param aTargetTypeSystem * type system used for filtering what gets serialized. Any types or features not in the * target type system will not be serialized. A null value indicates no filtering, that is, * that all types and features will be serialized. * @param output * output (File, OutputStream, or Writer) to which to write the JSON document * * @throws IOException if there was an IOException */ public static void jsonSerialize(CAS aCAS, TypeSystem aTargetTypeSystem, Object output) throws IOException { jsonSerialize(aCAS, aTargetTypeSystem, output, false, null, null); } /** * Serializes a Delta CAS to an output (File, Writer, or OutputStream). * This version of this method allows many options to be configured. * * * @param aCAS * CAS to serialize. * @param aTargetTypeSystem * type system to which the produced XMI will conform. Any types or features not in the * target type system will not be serialized. A null value indicates that all types and features * will be serialized. * @param output * File, Writer, or OutputStream to which to write the JSON document * @param aPrettyPrint * if true the JSON output will be formatted with newlines and indenting. If false it will be unformatted. * @param aMarker * an optional object used to determine which FeatureStructures and modifications were created after * the mark was set. Used to serialize a Delta CAS consisting of only new FSs and views and * preexisting FSs and Views that have been modified. If null, full serialization is done. * See the JavaDocs for {@link Marker} for details. * @param sharedData optional, used for delta serialization (not yet supported) * @throws IOException if there was an IOException */ public static void jsonSerialize(CAS aCAS, TypeSystem aTargetTypeSystem, Object output, boolean aPrettyPrint, Marker aMarker, XmiSerializationSharedData sharedData) throws IOException { JsonCasSerializer ser = new JsonCasSerializer(); ser.setFilterTypes((TypeSystemImpl)aTargetTypeSystem); ser.setPrettyPrint(aPrettyPrint); ser.serialize(aCAS, output, sharedData, aMarker); } /************************************************************************************* * Multi-step api * * 1) Create an instance of this class and use for configuration, specifying or defaulting * type system to use for filtering (default - no filtering) * prettyprinting (default - false) * * 1b) Do any additional wanted configuration on the instance of this class * instance.prettyPrint(true/false); * instance.useJsonFactory(factory) * instance.filterTypes(typeSystem) * instance.errorHandler(errorHandler) * instance.jsonFormat(EnumSet.of(x, y, z)) - default is none of the settings * * instance.getGenerator() to further configure the generator if the defaults are not what is wanted. * * 2) call its serializeJson method, passing in the CAS, and an output (Writer/Outputstream/File) * *************************************************************************************/ /** * Serialize a Cas to an Output, using configurations set on this instance. * Constructs a JsonContentHandlerJacksonWrapper, using configured JsonFactory and prettyprint settings if any * @param cas - the CAS to serialize * @param output - where the output goes, an OutputStream, Writer, or File * @throws IOException if there was an IOException */ public void serialize(CAS cas, Object output) throws IOException { serialize(cas, output, null, null); } public void serialize(CAS cas, Object output, XmiSerializationSharedData sharedData, Marker marker) throws IOException { JsonContentHandlerJacksonWrapper jch; try { jch = new JsonContentHandlerJacksonWrapper(jsonFactory, output, css.isFormattedOutput); } catch (SAXException e) { throw new IOException(e); } serialize(cas, jch, sharedData, marker); } /** * Serialize a Cas to an Output configured in the passed in JsonContentHandlerJacksonWrapper * Constructs a new CasDocSerializer instance to do the serialization, * configured using this class's Delta marker setting (if any) * @param cas The CAS to serialize * @param jch the configured content handler * @throws IOException if there was an IOException */ public void serialize(CAS cas, JsonContentHandlerJacksonWrapper jch) throws IOException { serialize(cas, jch, null, null); } public void serialize(CAS cas, JsonContentHandlerJacksonWrapper jch, XmiSerializationSharedData sharedData, Marker marker) throws IOException { JsonDocSerializer ser = new JsonDocSerializer(jch, ((CASImpl) cas).getBaseCAS(), sharedData, (MarkerImpl) marker); try { ser.cds.needNameSpaces = false; ser.cds.serialize(); } catch (Exception e) { if (e instanceof IOException) { throw (IOException) e; } throw new RuntimeException(e); } } /******************************************************** * Routines to set/reset configuration * ********************************************************/ /** * set or reset the pretty print flag (default is false) * @param pp true to do pretty printing of output * @return the original instance, possibly updated */ public JsonCasSerializer setPrettyPrint(boolean pp) { css.setPrettyPrint(pp); return this; } /** * set which JsonFactory instance to use; if null, a new instance is used * this can be used to preconfigure the JsonFactory instance * @param jsonFactory - * @return the original instance, possibly updated */ public JsonCasSerializer setJsonFactory(JsonFactory jsonFactory) { this.jsonFactory = jsonFactory; return this; } /** * pass in a type system to use for filtering what gets serialized; * only those types and features which are defined this type system are included. * @param ts the filter * @return the original instance, possibly updated */ public JsonCasSerializer setFilterTypes(TypeSystemImpl ts) { css.setFilterTypes(ts); return this; } public JsonCasSerializer setTypeSystemReference(String reference) { typeSystemReference = reference; return this; } // not done here, done on serialize call, because typically changes for each call // /** // * set the Marker to specify delta cas serialization // * forces static embedding mode // * @param m - the marker // * @return the original instance, possibly updated // */ // public JsonCasSerializer setDeltaCas(Marker m, XmiSerializationSharedData sharedData) { // css.setDeltaCas(m); // setStaticEmbedding(); // delta requires static embedding mode // return this; // } /** * set an error handler to receive information about errors * @param eh the error handler * @return the original instance, possibly updated */ public JsonCasSerializer setErrorHandler(ErrorHandler eh) { css.setErrorHandler(eh); return this; } /** * Sets static embedding mode * @return the original instance, possibly updated */ public JsonCasSerializer setStaticEmbedding() { isDynamicEmbedding = false; return this; } /** * sets which Json context format to use when serializing * @param format the format to use for the serialization * Specifying the context flag also specifies all 3 subflags * Specifying one of the subflags as true sets the context flag to true if it isn't already * @return the original instance, possibly updated */ public JsonCasSerializer setJsonContext(JsonContextFormat format) { switch (format) { case omitContext: isWithContext = false; isWithSubtypes = false; isWithExpandedTypeNames = false; break; case omitSubtypes: isWithSubtypes = false; break; case omitExpandedTypeNames: isWithExpandedTypeNames = false; break; } return this; } public JsonCasSerializer setOmit0Values(boolean omitDefaultValues) { isOmit0Values = omitDefaultValues; return this; } private static class MapType2Subtypes extends RedBlackTree { /** * * @param type main type * @param subtype subtype of main type * @return true if added, false if already was there */ boolean addSubtype(int type, int subtype) { IntVector iv = get(type); if (null == iv) { iv = new IntVector(); iv.add(subtype); put(type, iv); return true; } if (iv.contains(subtype)) { return false; } iv.add(subtype); return true; } } class JsonDocSerializer extends CasSerializerSupportSerialize { private final CasDocSerializer cds; private final JsonContentHandlerJacksonWrapper jch; private final JsonGenerator jg; private final Map serializedStrings = new HashMap<>(); private final Map usedTypeName2XmlElementName; private final MapType2Subtypes mapType2Subtypes = new MapType2Subtypes(); private final List parentTypesWithNoInstances = new ArrayList<>(); private int lastEncodedTypeCode; private boolean startedReferencedFSs; private final boolean isOmitDefaultValues; private final boolean isWithContext; private final boolean isWithSubtypes; private boolean indexId; // true causes fs to be listed as "id" : { ...}, false as "type" : [ {...} private boolean isEmbedded = false; // true for embedded FSs, causes _type to be included private boolean isEmbeddedFromFsFeature; // used for NL formatting, false if embedded due to Array or List private boolean startedFeatureTypes; private JsonDocSerializer(ContentHandler ch, CASImpl cas, XmiSerializationSharedData sharedData, MarkerImpl marker) { cds = css.new CasDocSerializer(ch, cas, sharedData, marker, this, JsonCasSerializer.this.isDynamicEmbedding); this.isOmitDefaultValues = JsonCasSerializer.this.isOmit0Values; isWithSubtypes = JsonCasSerializer.this.isWithSubtypes; jch = (JsonContentHandlerJacksonWrapper) ch; jg = jch.getJsonGenerator(); isWithContext = JsonCasSerializer.this.isWithContext || isWithSubtypes || isWithExpandedTypeNames; usedTypeName2XmlElementName = new HashMap<>(cds.tsi.getNumberOfTypes()); } @Override protected void initializeNamespaces() { if (cds.sharedData != null && (null != cds.sharedData.getOutOfTypeSystemElements() || cds.sharedData.hasOutOfTypeSystemArrayElements())) { throw new UnsupportedOperationException("Can't do JSON serialization " + "if there are out-of-type-system elements," + " because there's no type information available (needed for _context)"); } } @Override protected void writeViews() throws Exception { if (!cds.isDelta) { return; } jch.writeNlJustBeforeNext(); jg.writeFieldName(DELTA_CAS_NAME); jg.writeStartObject(); cds.writeViewsCommons(); // encodes cas.sofaCount + 1 elements jg.writeEndObject(); // and end of views property } @Override protected void writeFeatureStructures(int elementCount /* not used */ ) throws Exception{ jch.withoutNl(); // set up prettyprint mode so this class controls it jg.writeStartObject(); // container for (maybe) context, fss (2 parts), and (maybe) delta view info if (isWithContext) { serializeJsonLdContext(); } jch.writeNlJustBeforeNext(); // write the reachable from indexes FS indexId = false; jg.writeFieldName(VIEWS_NAME); jg.writeStartObject(); final List[] byViewByTypeFSs = sortByViewType(); for (int viewNbr = 1; viewNbr <= byViewByTypeFSs.length; viewNbr++) { // viewNbr starts at 1 lastEncodedTypeCode = -1; final List fssInView = byViewByTypeFSs[viewNbr - 1]; final Sofa sofa = cds.getSofa(viewNbr); if (sofa == null && fssInView.size() == 0) { continue; // skip non-existent initial view with no sofa and no elements } jch.writeNlJustBeforeNext(); String viewName = (null == sofa) ? CAS.NAME_DEFAULT_SOFA : sofa.getSofaID(); jg.writeFieldName(viewName); // view namne jg.writeStartObject(); for (TOP fs : fssInView) { cds.encodeFS(fs); } if (lastEncodedTypeCode != -1) { jg.writeEndArray(); // of array of types under a fs } jg.writeEndObject(); } jg.writeEndObject(); // end of value for _views // write the non-embeddable referenced FSs indexId = true; startedReferencedFSs = false; cds.encodeQueued(); if (startedReferencedFSs) { jg.writeEndObject(); // of all referenced FSs } } @Override protected void writeEndOfSerialization() throws IOException { jg.writeEndObject(); // wrapper of _context and cas jg.flush(); } // sort the by-view by-type set // previously Serialized /** * @return the List[] returned by cds.indexedFSs, but with each view sorted by type */ private List[] sortByViewType() { @SuppressWarnings("unchecked") final List[] r = new List[cds.indexedFSs.length]; int i = 0; for (final List fss : cds.indexedFSs) { r[i] = (fss == null) ? Collections.EMPTY_LIST : (List) ((ArrayList)fss).clone(); r[i++].sort(cds.sortFssByType); } return r; } @Override protected void writeView(Sofa sofa, Collection members) throws IOException { jch.writeNlJustBeforeNext(); String sofaXmiId = (null == sofa) ? "0" : cds.getXmiId(sofa); jg.writeArrayFieldStart(sofaXmiId); writeViewMembers(members); //check for out-of-typesystem members if (cds.sharedData != null) { List ootsMembers = cds.sharedData.getOutOfTypeSystemViewMembers(sofaXmiId); jch.writeNlJustBeforeNext(); writeViewMembers(ootsMembers); } jg.writeEndArray(); } private void writeViewForDeltas(SerializedString kind, Collection deltaMembers) throws IOException { jg.writeFieldName(kind); jg.writeStartArray(); writeViewMembers(deltaMembers); jg.writeEndArray(); } @Override protected void writeView(Sofa sofa, Collection added, Collection deleted, Collection reindexed) throws IOException { jch.writeNlJustBeforeNext(); jg.writeFieldName(cds.getXmiId(sofa)); jg.writeStartObject(); writeViewForDeltas(ADDED_MEMBERS_NAME, added); writeViewForDeltas(DELETED_MEMBERS_NAME, deleted); writeViewForDeltas(REINDEXED_MEMBERS_NAME, reindexed); jg.writeEndObject(); } private void writeViewMembers(Collection members) throws IOException { int nextBreak = CasSerializerSupport.PP_ELEMENTS; int i = 0; for (TOP member : members) { int xmiId = cds.getXmiIdAsInt(member); if (xmiId == 0) { continue; } if (i++ > nextBreak) { jch.writeNlJustBeforeNext(); nextBreak += CasSerializerSupport.PP_ELEMENTS; } jg.writeNumber(xmiId); } } /* * version for oots data */ private void writeViewMembers(List members) throws IOException { int nextBreak = CasSerializerSupport.PP_ELEMENTS; int i = 0; for (String xmiId : members) { if (null == xmiId || xmiId.length() == 0) { continue; } if (i++ > nextBreak) { jch.writeNlJustBeforeNext(); nextBreak += CasSerializerSupport.PP_ELEMENTS; } jg.writeNumber(Integer.parseInt(xmiId)); } } /** *

JSON: serialize context info

* *

The context has several parts. *

The typeSystemReference is an optional URI to a type system that is written out. *

The types part is organized by the type hierarchy, starting with the uima.cas.TOP type. There is an entry * for each type which has 1 or more serailized instances, and also for all supertypes of those types. * The entry is a JSON key-value pair "short-type-name" : {...}.

* *

The information for each type has 3 sections:

*
    *
  1. _subtypes - a JSON map of key-value pairs, keyed by the short type-name of * used subtypes of this type. If this type has * no used subtypes, this element is omitted. * The value is an instance of this structure, for that type.
  2. * *
  3. _id - the fully qualified UIMA type name
  4. * *
  5. @featureTypes - a map with keys being specific features of the type * that need extra information about their contents, * and the value being that extra information.
  6. *
* * RANGE_IDs specify the type of the value of a feature. There are currently 2 kinds: * *
    *
  • "@featureByteArray" - indicates the string value should be decoded as a base64 binary encoded byte array
  • *
  • "{ "@featureRef" : "short_type_name" } - indicates the number or array of numbers * should be interpreted as a reference to a FS having this number (or array of numbers) * as its id(s). * 0 is interpreted as a null reference. * The type of the FS being referred to is of type "short_type_name" or a subtype.
  • *
* @throws IOException */ private void serializeJsonLdContext() throws IOException { jg.writeFieldName(CONTEXT_NAME); jg.writeStartObject(); if (typeSystemReference != null) { jch.writeNlJustBeforeNext(); jg.writeFieldName(TYPE_SYSTEM_NAME); jg.writeString(typeSystemReference); } collectUsedSubtypes(); jch.writeNlJustBeforeNext(); jg.writeFieldName(TYPES_NAME); jg.writeStartObject(); for (TypeImpl ti : cds.getSortedUsedTypes()) { jch.writeNlJustBeforeNext(); jg.writeFieldName(getSerializedTypeName(ti)); jg.writeStartObject(); if (isWithExpandedTypeNames) { jg.writeFieldName(ID_NAME); // form for using SerializedString jg.writeString(ti.getName()); } addJsonFeatContext(ti); if (isWithSubtypes) { addJsonSubtypes(ti); } jg.writeEndObject(); // end of one type } // write out contexts for types in the supertype chain which have no instances for (final TypeImpl ti : parentTypesWithNoInstances) { jch.writeNlJustBeforeNext(); jg.writeFieldName(getSerializedTypeName(ti)); jg.writeStartObject(); XmlElementName xe = cds.typeCode2namespaceNames[ti.getCode()]; if (isWithExpandedTypeNames) { jg.writeFieldName(ID_NAME); // form for using SerializedString jg.writeString(xe.nsUri); } addJsonFeatContext(ti); if (isWithSubtypes) { addJsonSubtypes(ti); } jg.writeEndObject(); // end of one type } jg.writeEndObject(); // end of _types jg.writeEndObject(); // end of _context } /** * _feature_types : { "featName" : "_ref" or "_byte_array, ... } * * @param type the type for which to generate the feature context info * @throws IOException */ private void addJsonFeatContext(TypeImpl type) throws IOException { final FeatureImpl[] feats = type.getFeatureImpls(); startedFeatureTypes = false; for (FeatureImpl feat : feats) { final int fsClass = CasSerializerSupport.classifyType(feat.getRangeImpl()); SerializedString featKind = featureTypeLabel(fsClass); if (null != featKind) { maybeDoStartFeatureTypes(); jg.writeFieldName(getSerializedString(feat.getShortName())); jg.writeString(featKind); } } if (startedFeatureTypes) { jg.writeEndObject(); } } private void maybeDoStartFeatureTypes() throws IOException { if (!startedFeatureTypes) { jch.writeNlJustBeforeNext(); jg.writeFieldName(FEATURE_TYPES_NAME); jg.writeStartObject(); startedFeatureTypes = true; } } private SerializedString getShortFeatureName(FeatureImpl feat) { return getSerializedString(feat.getShortName()); } /** * Add subtype information for used types limited to used subtypes * @throws IOException */ private void addJsonSubtypes(TypeImpl ti) throws IOException { IntVector iv = mapType2Subtypes.get(ti.getCode()); if (null != iv && iv.size() > 0) { jch.writeNlJustBeforeNext(); jg.writeFieldName(SUB_TYPES_NAME); jg.writeStartArray(); TypeSystemImpl tsi = ti.getTypeSystem(); for (int typeCode : iv.toArray()) { jg.writeString(getSerializedTypeName(tsi.getTypeForCode(typeCode))); } jg.writeEndArray(); } } private void collectUsedSubtypes() { final TypeImpl[] tiArray = cds.getSortedUsedTypes(); for (TypeImpl ti : tiArray) { // all used types int subtypeCode = ti.getCode(); // loop up the super chain for this type, // add parent -> subtype entries (until try to add one that's already there) for (TypeImpl parent = (TypeImpl) ti.getSuperType(); parent != null; parent = (TypeImpl) parent.getSuperType()) { final int parentCode = parent.getCode(); // next comparator must match the one used for sorting the tiArray // https://issues.apache.org/jira/browse/UIMA-5171 // if parent not contained in tiArray if (Arrays.binarySearch(tiArray, parent, CasSerializerSupport.COMPARATOR_SHORT_TYPENAME) < 0 ) { if (!parentTypesWithNoInstances.contains(parent)) { parentTypesWithNoInstances.add(parent); } } boolean wasAdded = mapType2Subtypes.addSubtype(parentCode, subtypeCode); if (!wasAdded) { break; } subtypeCode = parentCode; } } } private SerializedString getSerializedTypeName(TypeImpl ti) { XmlElementName xe = cds.typeCode2namespaceNames[ti.getCode()]; if (null == xe) { // happens for supertypes which have no instantiations String typeName = ti.getName(); xe = uimaTypeName2XmiElementName(typeName); checkForNameCollision(xe); cds.typeCode2namespaceNames[ti.getCode()] = xe; } return getSerializedString(xe.qName); } private SerializedString getSerializedString(String s) { SerializedString ss = serializedStrings.get(s); if (ss == null) { ss = new SerializedString(s); serializedStrings.put(s, ss); } return ss; } /* * keep map from short type name to XmlElementName (full name, namespace, etc) * This map starts out empty * first use of type puts entry in * first use of type with different full name adds namespace to both */ @Override protected void checkForNameCollision(XmlElementName xmlElementName) { XmlElementName xel = usedTypeName2XmlElementName.get(xmlElementName.localName); if (xel != null) { if (xel.nsUri.equals(xmlElementName.nsUri)) { // nsUri is the fully qualified name return; // don't need name spaces yet, or have already added them for this item } else { addNameSpace(xel); addNameSpace(xmlElementName); // usedTypeName2XmlElementName.clear(); // not needed anymore return; } } usedTypeName2XmlElementName.put(xmlElementName.localName, xmlElementName); return; } @Override protected boolean writeFsStart(TOP fs, int typeCode) throws IOException { if (isEmbedded) { if (!isEmbeddedFromFsFeature) { jch.writeNlJustBeforeNext(); // if from feature, already did nl } jg.writeStartObject(); } else if (indexId) { if (!startedReferencedFSs) { jch.writeNlJustBeforeNext(); jg.writeFieldName(REFERENCED_FSS_NAME); jg.writeStartObject(); startedReferencedFSs = true; } jch.writeNlJustBeforeNext(); jg.writeFieldName(cds.getXmiId(fs)); jg.writeStartObject(); // start of feat : value } else { // fs's as arrays under typeName if (typeCode != lastEncodedTypeCode) { if (lastEncodedTypeCode != -1) { // close off previous Array jg.writeEndArray(); } lastEncodedTypeCode = typeCode; jch.writeNlJustBeforeNext(); jg.writeFieldName(getSerializedTypeName(fs._getTypeImpl())); jg.writeStartArray(); } // if we're not going to write the actual FS here, // and are just going to write the ref, // skip the start object if (!cds.isDynamicMultiRef || !cds.multiRefFSs.contains(fs)) { jch.writeNlJustBeforeNext(); jg.writeStartObject(); // start of feat : value } } return indexId; } @Override protected void writeFsRef(TOP fs) throws Exception { jg.writeNumber(cds.getXmiIdAsInt(fs)); } // private void maybeWriteIdFeat(int addr) throws IOException { // if (!omitId) { // jg.writeFieldName(ID_NAME); // jg.writeNumber(cds.getXmiIdAsInt(addr)); // } // } private void maybeWriteTypeFeat(TypeImpl ti) throws IOException { if (indexId || isEmbedded) { jg.writeFieldName(TYPE_NAME); jg.writeString(getSerializedTypeName(ti)); } } @Override protected void writeFs(TOP fs, int typeCode) throws IOException { writeFsOrLists(fs, fs._getTypeImpl(), false); } @Override protected void writeListsAsIndividualFSs(TOP fs, int typeCode) throws IOException { writeFsOrLists(fs, fs._getTypeImpl(), true); } private void writeFsOrLists(TOP fs, TypeImpl ti, boolean isListAsFSs) throws IOException { final FeatureImpl[] feats = ti.getFeatureImpls(); // maybeWriteIdFeat(addr); maybeWriteTypeFeat(ti); for (final FeatureImpl feat : feats) { if (cds.isFiltering) { // skip features that aren't in the target type system String fullFeatName = feat.getName(); if (cds.filterTypeSystem_inner.getFeatureByFullName(fullFeatName) == null) { continue; } } // final int featAddr = addr + cds.cas.getAdjustedFeatureOffset(featCode); // final int featValRaw = cds.cas.getHeapValue(featAddr); final int featureClass = CasSerializerSupport.classifyType(feat.getRangeImpl()); final SerializedString shortName = getSerializedString(feat.getShortName()); switch (featureClass) { case LowLevelCAS.TYPE_CLASS_BYTE: writeNumeric(feat, fs._getByteValueNc (feat)); break; case LowLevelCAS.TYPE_CLASS_SHORT: writeNumeric(feat, fs._getShortValueNc(feat)); break; case LowLevelCAS.TYPE_CLASS_INT: writeNumeric(feat, fs._getIntValueNc (feat)); break; case LowLevelCAS.TYPE_CLASS_LONG: writeNumeric(feat, fs._getLongValueNc (feat)); break; case LowLevelCAS.TYPE_CLASS_FS: { TOP ref = fs._getFeatureValueNc(feat); if (ref == null /* && isOmitDefaultValues*/) continue; writeFsOrRef(ref, feat); // writes nl before embedded fs break; } case LowLevelCAS.TYPE_CLASS_FLOAT: final float floatVal = fs._getFloatValueNc(feat); if (floatVal == 0.F && isOmitDefaultValues) continue; jg.writeFieldName(shortName); jg.writeNumber(floatVal); break; case LowLevelCAS.TYPE_CLASS_DOUBLE: final double doubleVal = fs._getDoubleValueNc(feat); if (doubleVal == 0L && isOmitDefaultValues) continue; jg.writeFieldName(shortName); jg.writeNumber(doubleVal); break; case LowLevelCAS.TYPE_CLASS_BOOLEAN: jg.writeFieldName(shortName); jg.writeBoolean(fs._getBooleanValueNc(feat)); break; case LowLevelCAS.TYPE_CLASS_STRING: { String s = fs._getStringValueNc(feat); if (s == null /*&& isOmitDefaultValues*/) continue; jg.writeFieldName(shortName); jg.writeString(s); break; } case LowLevelCAS.TYPE_CLASS_INTARRAY: case LowLevelCAS.TYPE_CLASS_FLOATARRAY: case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY: case LowLevelCAS.TYPE_CLASS_BYTEARRAY: case LowLevelCAS.TYPE_CLASS_SHORTARRAY: case LowLevelCAS.TYPE_CLASS_LONGARRAY: case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY: case LowLevelCAS.TYPE_CLASS_STRINGARRAY: case LowLevelCAS.TYPE_CLASS_FSARRAY: writeArray(fs, feat, featureClass); break; case CasSerializerSupport.TYPE_CLASS_INTLIST: case CasSerializerSupport.TYPE_CLASS_FLOATLIST: case CasSerializerSupport.TYPE_CLASS_STRINGLIST: case CasSerializerSupport.TYPE_CLASS_FSLIST: writeList(fs, feat, featureClass, isListAsFSs); break; default: Misc.internalError(); } // end of switch } // end of loop over all features } private void writeNumeric(FeatureImpl fi, long v) throws IOException { if (v == 0 && isOmitDefaultValues) return; jg.writeFieldName(getShortFeatureName(fi)); jg.writeNumber(v); } private void writeArray(TOP fs, FeatureImpl fi, int featureClass) throws IOException { assert(fs != null); TOP array = fs._getFeatureValueNc(fi); if (array == null) return; jg.writeFieldName(getShortFeatureName(fi)); if (isDynamicOrStaticMultiRef(fi, array)) { jg.writeNumber(cds.getXmiIdAsInt(array)); } else { writeJsonArrayValues(array, featureClass); } } private void writeList(TOP fs, FeatureImpl fi, int featureClass, boolean isListAsFSs) throws IOException { assert(fs != null); TOP list = fs._getFeatureValueNc(fi); if (list == null) return; jg.writeFieldName(getShortFeatureName(fi)); if (isDynamicOrStaticMultiRef(fi, list, isListAsFSs)) { jg.writeNumber(cds.getXmiIdAsInt(list)); } else { writeJsonListValues(list); } } /** * for arrays and lists, * recursively write one FS, * as actual FS, * if dynamic embedding and single ref * OR, just write the reference id * If trying to write the null FS (due to filtering for instance), write 0 * @param addr * @throws IOException */ private void writeFsOrRef(TOP fs) throws IOException { if (fs == null || !cds.isDynamicMultiRef || cds.multiRefFSs.contains(fs)) { jg.writeNumber(cds.getXmiIdAsInt(fs)); } else { isEmbeddedFromFsFeature = false; writeEmbeddedFs(fs); } } private void writeEmbeddedFs(TOP fs) throws IOException { boolean savedEmbedded = isEmbedded; try { isEmbedded = true; cds.encodeFS(fs); } catch (Exception e) { if (e instanceof IOException) { throw (IOException) e; } throw new RuntimeException(e); } finally { isEmbedded = savedEmbedded; } // embed } private void writeFsOrRef(TOP fs, FeatureImpl fi) throws IOException { if (fs == null || !cds.isDynamicMultiRef || cds.multiRefFSs.contains(fs)) { jg.writeFieldName(getShortFeatureName(fi)); jg.writeNumber(cds.getXmiIdAsInt(fs)); } else { jch.writeNlJustBeforeNext(); jg.writeFieldName(getShortFeatureName(fi)); isEmbeddedFromFsFeature = true; // Use cases: can write embed, which has embed, which has non-embed // once hit non-embed, this flag would be turned off, // But it's only tested at the beginning of writeEmbeddedFs, so subsequent fields reset this // This flag only used to control new lines for embedded case writeEmbeddedFs(fs); isEmbeddedFromFsFeature = false; // restore default } } /** * Write FSArrays */ @Override protected void writeArrays(TOP fs, int typeCode, int typeClass) throws IOException { // maybeWriteIdFeat(addr); maybeWriteTypeFeat(fs._getTypeImpl()); jg.writeFieldName(COLLECTION_NAME); writeJsonArrayValues(fs, typeClass); } @Override protected void writeEndOfIndividualFs() throws IOException { jg.writeEndObject(); } // writes a set of values in a JSON array // or null if the reference to the UIMA array is actually null // 0 length arrays are written as [] // Note: FSs can be embedded for FS Arrays private void writeJsonArrayValues(TOP array, int arrayType) throws IOException { if (array == null) { jg.writeNull(); return; } cds.visited_not_yet_written.remove(array); CommonArrayFS ca = (CommonArrayFS) array; final int array_size = ca.size(); if (arrayType == LowLevelCAS.TYPE_CLASS_BYTEARRAY) { // special case for byte arrays: // serialize using standard JACKSON/JSON binary serialization // (doing extra copy to avoid figuring out the impl details) ByteArray ba = (ByteArray) array; jg.writeBinary(ba._getTheArray()); } else { jg.writeStartArray(); // int pos = cds.cas.getArrayStartAddress(addr); switch(arrayType) { case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY: { boolean[] a = ((BooleanArray)array)._getTheArray(); writeArrayElements(array_size, i -> jg.writeBoolean(a[i])); break; } case LowLevelCAS.TYPE_CLASS_BYTEARRAY: { ByteArray ba = (ByteArray) array; jg.writeBinary(ba._getTheArray()); break; } case LowLevelCAS.TYPE_CLASS_SHORTARRAY: { short[] a = ((ShortArray)array)._getTheArray(); writeArrayElements(array_size, i -> jg.writeNumber(a[i])); break; } case LowLevelCAS.TYPE_CLASS_INTARRAY: { int[] a = ((IntegerArray)array)._getTheArray(); writeArrayElements(array_size, i -> jg.writeNumber(a[i])); break; } case LowLevelCAS.TYPE_CLASS_LONGARRAY: { long[] a = ((LongArray)array)._getTheArray(); writeArrayElements(array_size, i -> jg.writeNumber(a[i])); break; } case LowLevelCAS.TYPE_CLASS_FLOATARRAY: { float[] a = ((FloatArray)array)._getTheArray(); writeArrayElements(array_size, i -> jg.writeNumber(a[i])); break; } case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY: { double[] a = ((DoubleArray)array)._getTheArray(); writeArrayElements(array_size, i -> jg.writeNumber(a[i])); break; } case LowLevelCAS.TYPE_CLASS_STRINGARRAY: { String[] a = ((StringArray)array)._getTheArray(); writeArrayElements(array_size, i -> jg.writeString(a[i])); break; } case LowLevelCAS.TYPE_CLASS_FSARRAY: writeFSArray(array, array_size); break; default: Misc.internalError(); } // end of switch jg.writeEndArray(); } } private void writeArrayElements(final int size, IntConsumer_withIOException ic) throws IOException { for (int i = 0; i < size; i++) { ic.accept(i); } } private void writeFSArray(TOP array, int array_size) throws NumberFormatException, IOException { FSArray fsArray = (FSArray) array; List ootsArrayElementsList = cds.sharedData == null ? null : cds.sharedData.getOutOfTypeSystemArrayElements(fsArray); int ootsIndex = 0; TOP[] fsItems = fsArray._getTheArray(); for (int j = 0; j < array_size; j++) { // j used to id the oots things TOP fsItem = fsItems[j]; // int heapValue = cds.cas.getHeapValue(pos++); if (fsItem == null) { // this null array element might have been a reference to an // out-of-typesystem FS, which, when deserialized, was replaced with NULL, // so check the ootsArrayElementsList boolean found = false; if (ootsArrayElementsList != null) { while (ootsIndex < ootsArrayElementsList.size()) { XmiArrayElement arel = ootsArrayElementsList.get(ootsIndex++); if (arel.index == j) { jg.writeNumber(Integer.parseInt(arel.xmiId)); found = true; break; } } } if (!found) { jg.writeNumber(0); } // else, not null FS ref } else { if (cds.isFiltering) { // return as null any references to types not in target TS String typeName = fsItem._getTypeImpl().getName(); if (cds.filterTypeSystem_inner.getType(typeName) == null) { fsItem = null; } } writeFsOrRef(fsItem); // allow embedding in array } } // end of loop over all refs in FS array } // a null ref is written as null // an empty list is written as [] /** * Only called if no sharing of list nodes exists (except for non-dynamic case) * Only called for list nodes referred to by Feature value slots in some FS. * @param curNode the address of the start of the list * @throws IOException */ private void writeJsonListValues(TOP curNode) throws IOException { if (curNode == null) { Misc.internalError(); } final PositiveIntSet visited = new PositiveIntSet_impl(); jg.writeStartArray(); FeatureStructure nextNode = null; while (curNode != null) { cds.visited_not_yet_written.remove(curNode); if (curNode instanceof EmptyList) { break; // would be the end element. a 0 is also treated as an end element } if (!visited.add(curNode._id())) { break; // loop detected, stop. no error report here, would be reported earlier during enqueue } // final int val = cds.cas.getHeapValue(curNode + cds.cas.getAdjustedFeatureOffset(headFeat)); if (curNode instanceof NonEmptyStringList) { NonEmptyStringList l = (NonEmptyStringList)curNode; jg.writeString(l.getHead()); nextNode = l.getCommonTail(); } else if (curNode instanceof NonEmptyFloatList) { NonEmptyFloatList l = (NonEmptyFloatList)curNode; jg.writeNumber(l.getHead()); nextNode = l.getCommonTail(); } else if (curNode instanceof NonEmptyFSList) { NonEmptyFSList l = (NonEmptyFSList)curNode; writeFsOrRef(l); // maybe embed nextNode = l.getCommonTail(); } else { // for ints NonEmptyIntegerList l = (NonEmptyIntegerList)curNode; jg.writeNumber(l.getHead()); nextNode = l.getCommonTail(); } curNode = (TOP) nextNode; } jg.writeEndArray(); } /** * Return null or a string representing the type of the feature * * * @param fsClass the class of the feature * @param featCode the feature code * @return _ref, _array, _byte_array, or null */ private SerializedString featureTypeLabel(int fsClass) { switch (fsClass) { case LowLevelCAS.TYPE_CLASS_FS: case LowLevelCAS.TYPE_CLASS_FSARRAY: case CasSerializerSupport.TYPE_CLASS_FSLIST: return FEATURE_REFS_NAME; case LowLevelCAS.TYPE_CLASS_INTARRAY: case LowLevelCAS.TYPE_CLASS_FLOATARRAY: case LowLevelCAS.TYPE_CLASS_STRINGARRAY: case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY: case LowLevelCAS.TYPE_CLASS_SHORTARRAY: case LowLevelCAS.TYPE_CLASS_LONGARRAY: case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY: case CasSerializerSupport.TYPE_CLASS_INTLIST: case CasSerializerSupport.TYPE_CLASS_FLOATLIST: case CasSerializerSupport.TYPE_CLASS_STRINGLIST: // we have refs only if the feature has // multipleReferencesAllowed = true return FEATURE_ARRAY_NAME; case LowLevelCAS.TYPE_CLASS_BYTEARRAY: return FEATURE_BYTE_ARRAY_NAME; default: // for primitives return null; } } /** * Converts a UIMA-style dotted type name to the element name that should be used in the * serialization. The XMI element name consists of three parts - the Namespace URI, the Local * Name, and the QName (qualified name). * * @param uimaTypeName * a UIMA-style dotted type name * @return a data structure holding the three components of the XML element name */ @Override protected XmlElementName uimaTypeName2XmiElementName(String uimaTypeName) { // split uima type name into namespace and short name String shortName; final int lastDotIndex = uimaTypeName.lastIndexOf('.'); if (lastDotIndex == -1) { // no namespace shortName = uimaTypeName; } else { shortName = uimaTypeName.substring(lastDotIndex + 1); } // convert short name to shared string, without interning, reduce GCs shortName = cds.getUniqueString(shortName); return new XmlElementName(uimaTypeName, shortName, shortName); // use short name for qname until namespaces needed } /** * Called to generate a new namespace prefix and add it to this element - due to a collision * @param xmlElementName */ @Override protected void addNameSpace(XmlElementName xmlElementName) { if (xmlElementName.qName.equals(xmlElementName.localName)) { // may have already had namespace added // split uima type name into namespace and short name String uimaTypeName = xmlElementName.nsUri; String shortName = xmlElementName.localName; final int lastDotIndex = uimaTypeName.lastIndexOf('.'); // determine what namespace prefix to use String prefix = cds.getNameSpacePrefix(uimaTypeName, uimaTypeName, lastDotIndex); xmlElementName.qName = cds.getUniqueString(prefix + ':' + shortName); } } private boolean isDynamicOrStaticMultiRef(FeatureImpl fi, TOP fs) { return (!cds.isDynamicMultiRef) ? cds.isStaticMultiRef(fi) : cds.multiRefFSs.contains(fs); } private boolean isDynamicOrStaticMultiRef(FeatureImpl fi, TOP fs, boolean isListAsFSs) { return (!cds.isDynamicMultiRef) ? (isListAsFSs || cds.isStaticMultiRef(fi)) : cds.multiRefFSs.contains(fs); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy