All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.uima.json.JsonCasSerializer Maven / Gradle / Ivy

Go to download

JSON support for UIMA SDK. This module is deprecated. Use https://github.com/apache/uima-uimaj-io-jsoncas instead.

There is a newer version: 3.5.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.json;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Marker;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.impl.ByteArrayFSImpl;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.cas.impl.CasSerializerSupport;
import org.apache.uima.cas.impl.CasSerializerSupport.CasDocSerializer;
import org.apache.uima.cas.impl.CasSerializerSupport.CasSerializerSupportSerialize;
import org.apache.uima.cas.impl.ListUtils;
import org.apache.uima.cas.impl.LowLevelCAS;
import org.apache.uima.cas.impl.MarkerImpl;
import org.apache.uima.cas.impl.TypeImpl;
import org.apache.uima.cas.impl.TypeSystemImpl;
import org.apache.uima.cas.impl.XmiSerializationSharedData;
import org.apache.uima.cas.impl.XmiSerializationSharedData.XmiArrayElement;
import org.apache.uima.internal.util.IntVector;
import org.apache.uima.internal.util.PositiveIntSet;
import org.apache.uima.internal.util.PositiveIntSet_impl;
import org.apache.uima.internal.util.XmlElementName;
import org.apache.uima.internal.util.rb_trees.RedBlackTree;
import org.apache.uima.json.impl.JsonContentHandlerJacksonWrapper;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.io.SerializedString;

/**
 * 

CAS serializer for JSON formats.

*

Writes a CAS in a JSON format.

* *

To use,

*
    *
  • create an instance of this class,
  • *
  • (optionally) configure the instance, and then
  • *
  • call serialize on the instance, optionally passing in additional parameters.
* *

After the 1st 2 steps, the serializer instance may be used for multiple calls (on multiple threads) to * the 3rd serialize step, if all calls use the same configuration.

* *

There are "convenience" static serialize methods that do these three steps for common configurations.

* *

Parameters can be configured in this instance (I), and/or as part of the serialize(S) call.

* *

The parameters that can be configured are:

*
    *
  • (S) The CAS to serialize *
  • (S) where to put the output - an OutputStream, Writer, or File
  • *
  • (I,S) a type system - (default null) if supplied, it is used to "filter" types and features that are serialized. If provided, only * those that exist in the passed in type system are included in the serialization
  • *
  • (I,S) a flag for prettyprinting - default false (no prettyprinting)
  • *
* *

For Json serialization, additional configuration from the Jackson implementation can be configured

* on 2 associated Jackson instances: *
  • JsonFactory
  • *
  • JsonGenerator
* using the standard Jackson methods on the associated JsonFactory instance; * see the Jackson JsonFactory and JsonGenerator javadocs for details. * *

These 2 Jackson objects are settable/gettable from an instance of this class. * They are created if not supplied by the caller.

* *

Once this instance is configured, the serialize method is called * to serialized a CAS to an output.

* *

Instances of this class must be used on only one thread while configuration is being done; * afterwards, multiple threads may use the configured instance, to call serialize.

*/ public class JsonCasSerializer { private static final Integer[] EMPTY_INTEGER_ARRAY = new Integer[0]; private static final SerializedString CONTEXT_NAME = new SerializedString("_context"); private static final SerializedString TYPE_SYSTEM_NAME = new SerializedString("_type_system"); private static final SerializedString TYPES_NAME = new SerializedString("_types"); private static final SerializedString ID_NAME = new SerializedString("_id"); private static final SerializedString SUB_TYPES_NAME = new SerializedString("_subtypes"); private static final SerializedString FEATURE_TYPES_NAME = new SerializedString("_feature_types"); private static final SerializedString FEATURE_REFS_NAME = new SerializedString("_ref"); private static final SerializedString FEATURE_ARRAY_NAME = new SerializedString("_array"); private static final SerializedString FEATURE_BYTE_ARRAY_NAME = new SerializedString("_byte_array"); private static final SerializedString REFERENCED_FSS_NAME = new SerializedString("_referenced_fss"); private static final SerializedString VIEWS_NAME = new SerializedString("_views"); private static final SerializedString TYPE_NAME = new SerializedString("_type"); private static final SerializedString COLLECTION_NAME = new SerializedString("_collection"); private static final SerializedString DELTA_CAS_NAME = new SerializedString("_delta_cas"); private static final SerializedString ADDED_MEMBERS_NAME = new SerializedString("added_members"); private static final SerializedString DELETED_MEMBERS_NAME = new SerializedString("deleted_members"); private static final SerializedString REINDEXED_MEMBERS_NAME = new SerializedString("reindexed_members"); /** *

The serialization can optionally include context information in addition to the feature structures.

* *

This context information is specified, per used-type.

* *

It can be further subdivided into 3 parts:

*
    *
  1. What their (used) subtypes are. This enables iterating over a type * and all of its subtypes, e.g. an iterator over all "Annotations".
  2. *
  3. whether or not to include the map from short type names to their fully qualified equivalents.
  4. *
  5. Information to enable deserialization of some ambiguous values, depending on the range type of a feature *
* *

Some of these may be omitted, if not wanted. This enum allows specifying what to omit.

* */ public enum JsonContextFormat { omitContext, // omit the entire context omitSubtypes, omitExpandedTypeNames, } private final CasSerializerSupport css = new CasSerializerSupport(); // for testing CasSerializerSupport getCss() { return css; } private JsonFactory jsonFactory = null; private boolean isDynamicEmbedding = true; private boolean isWithContext = true; private boolean isWithSubtypes = true; private boolean isWithExpandedTypeNames = true; private boolean isOmit0Values = false; // https://issues.apache.org/jira/browse/UIMA-4117 private String typeSystemReference; /*********************************************** * C O N S T R U C T O R S * ***********************************************/ /** * Creates a new JsonCasSerializer */ public JsonCasSerializer() { } /************************************************** * J S O N * **************************************************/ /**************************************************** * Static JSON Serializer methods for convenience * * * * Note: these are named jsonSerialize * * The non-static methods * * are named serializeJson * ****************************************************/ /** * Serializes a CAS using JSON * * @param aCAS * CAS to serialize. * @param output * a File, OutputStream or Writer to which to write the XMI document * * @throws IOException if there was an IOException */ public static void jsonSerialize(CAS aCAS, Object output) throws IOException { jsonSerialize(aCAS, null, output, false, null, null); } /** * Serializes a CAS to an output (File, OutputStream, XMI stream, or Writer). * The supplied typesystem filters the output * * @param aCAS * CAS to serialize. * @param aTargetTypeSystem * type system used for filtering what gets serialized. Any types or features not in the * target type system will not be serialized. A null value indicates no filtering, that is, * that all types and features will be serialized. * @param output * output (File, OutputStream, or Writer) to which to write the JSON document * * @throws IOException if there was an IOException */ public static void jsonSerialize(CAS aCAS, TypeSystem aTargetTypeSystem, Object output) throws IOException { jsonSerialize(aCAS, aTargetTypeSystem, output, false, null, null); } /** * Serializes a Delta CAS to an output (File, Writer, or OutputStream). * This version of this method allows many options to be configured. * * * @param aCAS * CAS to serialize. * @param aTargetTypeSystem * type system to which the produced XMI will conform. Any types or features not in the * target type system will not be serialized. A null value indicates that all types and features * will be serialized. * @param output * File, Writer, or OutputStream to which to write the JSON document * @param aPrettyPrint * if true the JSON output will be formatted with newlines and indenting. If false it will be unformatted. * @param aMarker * an optional object used to determine which FeatureStructures and modifications were created after * the mark was set. Used to serialize a Delta CAS consisting of only new FSs and views and * preexisting FSs and Views that have been modified. If null, full serialization is done. * See the JavaDocs for {@link Marker} for details. * @param sharedData optional, used for delta serialization (not yet supported) * @throws IOException if there was an IOException */ public static void jsonSerialize(CAS aCAS, TypeSystem aTargetTypeSystem, Object output, boolean aPrettyPrint, Marker aMarker, XmiSerializationSharedData sharedData) throws IOException { JsonCasSerializer ser = new JsonCasSerializer(); ser.setFilterTypes((TypeSystemImpl)aTargetTypeSystem); ser.setPrettyPrint(aPrettyPrint); ser.serialize(aCAS, output, sharedData, aMarker); } /************************************************************************************* * Multi-step api * * 1) Create an instance of this class and use for configuration, specifying or defaulting * type system to use for filtering (default - no filtering) * prettyprinting (default - false) * * 1b) Do any additional wanted configuration on the instance of this class * instance.prettyPrint(true/false); * instance.useJsonFactory(factory) * instance.filterTypes(typeSystem) * instance.errorHandler(errorHandler) * instance.jsonFormat(EnumSet.of(x, y, z)) - default is none of the settings * * instance.getGenerator() to further configure the generator if the defaults are not what is wanted. * * 2) call its serializeJson method, passing in the CAS, and an output (Writer/Outputstream/File) * *************************************************************************************/ /** * Serialize a Cas to an Output, using configurations set on this instance. * Constructs a JsonContentHandlerJacksonWrapper, using configured JsonFactory and prettyprint settings if any * @param cas - the CAS to serialize * @param output - where the output goes, an OutputStream, Writer, or File * @throws IOException if there was an IOException */ public void serialize(CAS cas, Object output) throws IOException { serialize(cas, output, null, null); } public void serialize(CAS cas, Object output, XmiSerializationSharedData sharedData, Marker marker) throws IOException { JsonContentHandlerJacksonWrapper jch; try { jch = new JsonContentHandlerJacksonWrapper(jsonFactory, output, css.isFormattedOutput); } catch (SAXException e) { throw new IOException(e); } serialize(cas, jch, sharedData, marker); } /** * Serialize a Cas to an Output configured in the passed in JsonContentHandlerJacksonWrapper * Constructs a new CasDocSerializer instance to do the serialization, * configured using this class's Delta marker setting (if any) * @param cas The CAS to serialize * @param jch the configured content handler * @throws IOException if there was an IOException */ public void serialize(CAS cas, JsonContentHandlerJacksonWrapper jch) throws IOException { serialize(cas, jch, null, null); } public void serialize(CAS cas, JsonContentHandlerJacksonWrapper jch, XmiSerializationSharedData sharedData, Marker marker) throws IOException { JsonDocSerializer ser = new JsonDocSerializer(jch, ((CASImpl) cas).getBaseCAS(), sharedData, (MarkerImpl) marker); try { ser.cds.needNameSpaces = false; ser.cds.serialize(); } catch (Exception e) { if (e instanceof IOException) { throw (IOException) e; } throw new RuntimeException(e); } } /******************************************************** * Routines to set/reset configuration * ********************************************************/ /** * set or reset the pretty print flag (default is false) * @param pp true to do pretty printing of output * @return the original instance, possibly updated */ public JsonCasSerializer setPrettyPrint(boolean pp) { css.setPrettyPrint(pp); return this; } /** * set which JsonFactory instance to use; if null, a new instance is used * this can be used to preconfigure the JsonFactory instance * @param jsonFactory - * @return the original instance, possibly updated */ public JsonCasSerializer setJsonFactory(JsonFactory jsonFactory) { this.jsonFactory = jsonFactory; return this; } /** * pass in a type system to use for filtering what gets serialized; * only those types and features which are defined this type system are included. * @param ts the filter * @return the original instance, possibly updated */ public JsonCasSerializer setFilterTypes(TypeSystemImpl ts) { css.setFilterTypes(ts); return this; } public JsonCasSerializer setTypeSystemReference(String reference) { typeSystemReference = reference; return this; } // not done here, done on serialize call, because typically changes for each call // /** // * set the Marker to specify delta cas serialization // * forces static embedding mode // * @param m - the marker // * @return the original instance, possibly updated // */ // public JsonCasSerializer setDeltaCas(Marker m, XmiSerializationSharedData sharedData) { // css.setDeltaCas(m); // setStaticEmbedding(); // delta requires static embedding mode // return this; // } /** * set an error handler to receive information about errors * @param eh the error handler * @return the original instance, possibly updated */ public JsonCasSerializer setErrorHandler(ErrorHandler eh) { css.setErrorHandler(eh); return this; } /** * Sets static embedding mode * @return the original instance, possibly updated */ public JsonCasSerializer setStaticEmbedding() { isDynamicEmbedding = false; return this; } /** * sets which Json context format to use when serializing * @param format the format to use for the serialization * Specifying the context flag also specifies all 3 subflags * Specifying one of the subflags as true sets the context flag to true if it isn't already * @return the original instance, possibly updated */ public JsonCasSerializer setJsonContext(JsonContextFormat format) { switch (format) { case omitContext: isWithContext = false; isWithSubtypes = false; isWithExpandedTypeNames = false; break; case omitSubtypes: isWithSubtypes = false; break; case omitExpandedTypeNames: isWithExpandedTypeNames = false; break; } return this; } public JsonCasSerializer setOmit0Values(boolean omitDefaultValues) { isOmit0Values = omitDefaultValues; return this; } private static class MapType2Subtypes extends RedBlackTree { /** * * @param type main type * @param subtype subtype of main type * @return true if added, false if already was there */ boolean addSubtype(int type, int subtype) { IntVector iv = get(type); if (null == iv) { iv = new IntVector(); iv.add(subtype); put(type, iv); return true; } if (iv.contains(subtype)) { return false; } iv.add(subtype); return true; } } class JsonDocSerializer extends CasSerializerSupportSerialize { private final CasDocSerializer cds; private final JsonContentHandlerJacksonWrapper jch; private final JsonGenerator jg; private final String typeSystemReference; private final Map serializedStrings = new HashMap(); private final Map usedTypeName2XmlElementName; private final MapType2Subtypes mapType2Subtypes = new MapType2Subtypes(); private final IntVector parentTypesWithNoInstances = new IntVector(); private int lastEncodedTypeCode; private boolean startedReferencedFSs; private final boolean isOmitDefaultValues; private final boolean isWithContext; private final boolean isWithExpandedTypeNames; private final boolean isWithSubtypes; private boolean indexId; // true causes fs to be listed as "id" : { ...}, false as "type" : [ {...} private boolean isEmbedded = false; // true for embedded FSs, causes _type to be included private boolean isEmbeddedFromFsFeature; // used for NL formatting, false if embedded due to Array or List private boolean startedFeatureTypes; private JsonDocSerializer(ContentHandler ch, CASImpl cas, XmiSerializationSharedData sharedData, MarkerImpl marker) { cds = css.new CasDocSerializer(ch, cas, sharedData, marker, this, JsonCasSerializer.this.isDynamicEmbedding); this.isOmitDefaultValues = JsonCasSerializer.this.isOmit0Values; isWithExpandedTypeNames = JsonCasSerializer.this.isWithExpandedTypeNames; isWithSubtypes = JsonCasSerializer.this.isWithSubtypes; typeSystemReference = JsonCasSerializer.this.typeSystemReference; jch = (JsonContentHandlerJacksonWrapper) ch; jg = jch.getJsonGenerator(); isWithContext = JsonCasSerializer.this.isWithContext || isWithSubtypes || isWithExpandedTypeNames; usedTypeName2XmlElementName = new HashMap(cds.tsi.getNumberOfTypes()); } @Override protected void initializeNamespaces() { if (cds.sharedData != null && (null != cds.sharedData.getOutOfTypeSystemElements() || cds.sharedData.hasOutOfTypeSystemArrayElements())) { throw new UnsupportedOperationException("Can't do JSON serialization " + "if there are out-of-type-system elements," + " because there's no type information available (needed for _context)"); } } @Override protected void writeViews() throws Exception { if (!cds.isDelta) { return; } jch.writeNlJustBeforeNext(); jg.writeFieldName(DELTA_CAS_NAME); jg.writeStartObject(); cds.writeViewsCommons(); // encodes cas.sofaCount + 1 elements jg.writeEndObject(); // and end of views property } @Override protected void writeFeatureStructures(int elementCount /* not used */ ) throws Exception{ jch.withoutNl(); // set up prettyprint mode so this class controls it jg.writeStartObject(); // container for (maybe) context, fss (2 parts), and (maybe) delta view info if (isWithContext) { serializeJsonLdContext(); } jch.writeNlJustBeforeNext(); // write the reachable from indexes FS indexId = false; jg.writeFieldName(VIEWS_NAME); jg.writeStartObject(); final Integer[][] byViewByTypeFSs = sortByViewType(); for (int viewNbr = 1; viewNbr <= byViewByTypeFSs.length; viewNbr++) { // viewNbr starts at 1 lastEncodedTypeCode = -1; final Integer[] fssInView = byViewByTypeFSs[viewNbr - 1]; final int sofaAddr = cds.getSofaAddr(viewNbr); if (sofaAddr == 0 && fssInView.length == 0) { continue; // skip non-existent initial view with no sofa and no elements } jch.writeNlJustBeforeNext(); String viewName = (0 == sofaAddr) ? CAS.NAME_DEFAULT_SOFA : cds.cas.getStringValue(sofaAddr, cds.tsi.sofaIdFeatCode); jg.writeFieldName(viewName); // view namne jg.writeStartObject(); for (Integer fs : fssInView) { cds.encodeFS(fs); } if (lastEncodedTypeCode != -1) { jg.writeEndArray(); // of array of types under a fs } jg.writeEndObject(); } jg.writeEndObject(); // end of value for _views // write the non-embeddable referenced FSs indexId = true; startedReferencedFSs = false; cds.encodeQueued(); if (startedReferencedFSs) { jg.writeEndObject(); // of all referenced FSs } } @Override protected void writeEndOfSerialization() throws IOException { jg.writeEndObject(); // wrapper of _context and cas jg.flush(); } // sort the by-view by-type set // previously Serialized private Integer[][] sortByViewType() { final Integer[] [] r = new Integer[cds.indexedFSs.length] []; int i = 0; for (final IntVector fss : cds.indexedFSs) { final Integer[] viewFSs = r[i++] = (null == fss) ? EMPTY_INTEGER_ARRAY : new Integer[fss.size()]; for (int j = 0; j < viewFSs.length; j++) { viewFSs[j] = fss.get(j); } Arrays.sort(viewFSs, cds.sortFssByType); } return r; } @Override protected void writeView(int sofaAddr, int[] members) throws IOException { jch.writeNlJustBeforeNext(); String sofaXmiId = (0 == sofaAddr) ? "0" : cds.getXmiId(sofaAddr); jg.writeArrayFieldStart(sofaXmiId); writeViewMembers(members); //check for out-of-typesystem members if (cds.sharedData != null) { List ootsMembers = cds.sharedData.getOutOfTypeSystemViewMembers(sofaXmiId); jch.writeNlJustBeforeNext(); writeViewMembers(ootsMembers); } jg.writeEndArray(); } private void writeViewForDeltas(SerializedString kind, int[] deltaMembers) throws IOException { jg.writeFieldName(kind); jg.writeStartArray(); writeViewMembers(deltaMembers); jg.writeEndArray(); } @Override protected void writeView(int sofaAddr, int[] added, int[] deleted, int[] reindexed) throws IOException { jch.writeNlJustBeforeNext(); jg.writeFieldName(cds.getXmiId(sofaAddr)); jg.writeStartObject(); writeViewForDeltas(ADDED_MEMBERS_NAME, added); writeViewForDeltas(DELETED_MEMBERS_NAME, deleted); writeViewForDeltas(REINDEXED_MEMBERS_NAME, reindexed); jg.writeEndObject(); } private void writeViewMembers(int[] members) throws IOException { int nextBreak = CasSerializerSupport.PP_ELEMENTS; int i = 0; for (int member : members) { int xmiId = cds.getXmiIdAsInt(member); if (xmiId == 0) { continue; } if (i++ > nextBreak) { jch.writeNlJustBeforeNext(); nextBreak += CasSerializerSupport.PP_ELEMENTS; } jg.writeNumber(xmiId); } } /* * version for oots data */ private void writeViewMembers(List members) throws IOException { int nextBreak = CasSerializerSupport.PP_ELEMENTS; int i = 0; for (String xmiId : members) { if (null == xmiId || xmiId.length() == 0) { continue; } if (i++ > nextBreak) { jch.writeNlJustBeforeNext(); nextBreak += CasSerializerSupport.PP_ELEMENTS; } jg.writeNumber(Integer.parseInt(xmiId)); } } /** *

JSON: serialize context info

* *

The context has several parts. *

The typeSystemReference is an optional URI to a type system that is written out. *

The types part is organized by the type hierarchy, starting with the uima.cas.TOP type. There is an entry * for each type which has 1 or more serailized instances, and also for all supertypes of those types. * The entry is a JSON key-value pair "short-type-name" : {...}.

* *

The information for each type has 3 sections:

*
    *
  1. _subtypes - a JSON map of key-value pairs, keyed by the short type-name of * used subtypes of this type. If this type has * no used subtypes, this element is omitted. * The value is an instance of this structure, for that type.
  2. * *
  3. _id - the fully qualified UIMA type name
  4. * *
  5. @featureTypes - a map with keys being specific features of the type * that need extra information about their contents, * and the value being that extra information.
  6. *
* * RANGE_IDs specify the type of the value of a feature. There are currently 2 kinds: * *
    *
  • "@featureByteArray" - indicates the string value should be decoded as a base64 binary encoded byte array
  • *
  • "{ "@featureRef" : "short_type_name" } - indicates the number or array of numbers * should be interpreted as a reference to a FS having this number (or array of numbers) * as its id(s). * 0 is interpreted as a null reference. * The type of the FS being referred to is of type "short_type_name" or a subtype.
  • *
* @throws IOException */ private void serializeJsonLdContext() throws IOException { jg.writeFieldName(CONTEXT_NAME); jg.writeStartObject(); if (typeSystemReference != null) { jch.writeNlJustBeforeNext(); jg.writeFieldName(TYPE_SYSTEM_NAME); jg.writeString(typeSystemReference); } collectUsedSubtypes(); jch.writeNlJustBeforeNext(); jg.writeFieldName(TYPES_NAME); jg.writeStartObject(); for (TypeImpl ti : cds.getSortedUsedTypes()) { jch.writeNlJustBeforeNext(); jg.writeFieldName(getSerializedTypeName(ti.getCode())); jg.writeStartObject(); if (isWithExpandedTypeNames) { jg.writeFieldName(ID_NAME); // form for using SerializedString jg.writeString(ti.getName()); } addJsonFeatContext(ti); if (isWithSubtypes) { addJsonSubtypes(ti); } jg.writeEndObject(); // end of one type } // write out contexts for types in the supertype chain which have no instances for (final int typeCode : parentTypesWithNoInstances.toArray()) { jch.writeNlJustBeforeNext(); jg.writeFieldName(getSerializedTypeName(typeCode)); jg.writeStartObject(); XmlElementName xe = cds.typeCode2namespaceNames[typeCode]; if (isWithExpandedTypeNames) { jg.writeFieldName(ID_NAME); // form for using SerializedString jg.writeString(xe.nsUri); } addJsonFeatContext(typeCode); if (isWithSubtypes) { addJsonSubtypes(typeCode); } jg.writeEndObject(); // end of one type } jg.writeEndObject(); // end of _types jg.writeEndObject(); // end of _context } /** * _feature_types : { "featName" : "_ref" or "_byte_array, ... } * * @param type the type for which to generate the feature context info * @throws IOException */ private void addJsonFeatContext(TypeImpl type) throws IOException { addJsonFeatContext(type.getCode()); } private void addJsonFeatContext(final int typeCode) throws IOException { final int[] feats = cds.tsi.ll_getAppropriateFeatures(typeCode); startedFeatureTypes = false; for (int featCode : feats) { final int fsClass = cds.classifyType(cds.tsi.range(featCode)); SerializedString featKind = featureTypeLabel(fsClass, featCode); if (null != featKind) { maybeDoStartFeatureTypes(); jg.writeFieldName(getShortFeatureName(featCode)); jg.writeString(featKind); } } if (startedFeatureTypes) { jg.writeEndObject(); } } private void maybeDoStartFeatureTypes() throws IOException { if (!startedFeatureTypes) { jch.writeNlJustBeforeNext(); jg.writeFieldName(FEATURE_TYPES_NAME); jg.writeStartObject(); startedFeatureTypes = true; } } private SerializedString getShortFeatureName(int featCode) { return getSerializedString(cds.tsi.ll_getFeatureForCode(featCode).getShortName()); } /** * Add subtype information for used types limited to used subtypes * @throws IOException */ private void addJsonSubtypes(TypeImpl ti) throws IOException { addJsonSubtypes(ti.getCode()); } private void addJsonSubtypes(int aTypeCode) throws IOException { IntVector iv = mapType2Subtypes.get(aTypeCode); if (null != iv && iv.size() > 0) { jch.writeNlJustBeforeNext(); jg.writeFieldName(SUB_TYPES_NAME); jg.writeStartArray(); for (int typeCode : iv.toArray()) { jg.writeString(getSerializedTypeName(typeCode)); } jg.writeEndArray(); } } private void collectUsedSubtypes() { final TypeImpl[] tiArray = cds.getSortedUsedTypes(); for (TypeImpl ti : tiArray) { // all used types int subtypeCode = ti.getCode(); // loop up the super chain for this type, // add parent -> subtype entries (until try to add one that's already there) for (TypeImpl parent = (TypeImpl) ti.getSuperType(); parent != null; parent = (TypeImpl) parent.getSuperType()) { final int parentCode = parent.getCode(); if (Arrays.binarySearch(tiArray, parent) < 0 ) { // if parentCode not contained in tiArray if (!parentTypesWithNoInstances.contains(parentCode)) { parentTypesWithNoInstances.add(parentCode); } } boolean wasAdded = mapType2Subtypes.addSubtype(parentCode, subtypeCode); if (!wasAdded) { break; } subtypeCode = parentCode; } } } private SerializedString getSerializedTypeName(int typeCode) { XmlElementName xe = cds.typeCode2namespaceNames[typeCode]; if (null == xe) { // happens for supertypes which have no instantiations String typeName = cds.tsi.ll_getTypeForCode(typeCode).getName(); xe = uimaTypeName2XmiElementName(typeName); cds.typeCode2namespaceNames[typeCode] = xe; } return getSerializedString(xe.qName); } private SerializedString getSerializedString(String s) { SerializedString ss = serializedStrings.get(s); if (ss == null) { ss = new SerializedString(s); serializedStrings.put(s, ss); } return ss; } @Override protected void checkForNameCollision(XmlElementName xmlElementName) { XmlElementName xel = usedTypeName2XmlElementName.get(xmlElementName.localName); if (xel != null) { if (xel.nsUri.equals(xmlElementName.nsUri)) { // nsUri is the fully qualified name return; // don't need name spaces yet } else { addNameSpace(xel); addNameSpace(xmlElementName); // usedTypeName2XmlElementName.clear(); // not needed anymore return; } } usedTypeName2XmlElementName.put(xmlElementName.localName, xmlElementName); return; } @Override protected boolean writeFsStart(int addr, int typeCode) throws IOException { if (isEmbedded) { if (!isEmbeddedFromFsFeature) { jch.writeNlJustBeforeNext(); // if from feature, already did nl } jg.writeStartObject(); } else if (indexId) { if (!startedReferencedFSs) { jch.writeNlJustBeforeNext(); jg.writeFieldName(REFERENCED_FSS_NAME); jg.writeStartObject(); startedReferencedFSs = true; } jch.writeNlJustBeforeNext(); jg.writeFieldName(cds.getXmiId(addr)); jg.writeStartObject(); // start of feat : value } else { // fs's as arrays under typeName if (typeCode != lastEncodedTypeCode) { if (lastEncodedTypeCode != -1) { // close off previous Array jg.writeEndArray(); } lastEncodedTypeCode = typeCode; jch.writeNlJustBeforeNext(); jg.writeFieldName(getSerializedTypeName(typeCode)); jg.writeStartArray(); } // if we're not going to write the actual FS here, // and are just going to write the ref, // skip the start object if (cds.multiRefFSs == null || !cds.multiRefFSs.contains(addr)) { jch.writeNlJustBeforeNext(); jg.writeStartObject(); // start of feat : value } } return indexId; } @Override protected void writeFsRef(int addr) throws Exception { jg.writeNumber(cds.getXmiIdAsInt(addr)); } // private void maybeWriteIdFeat(int addr) throws IOException { // if (!omitId) { // jg.writeFieldName(ID_NAME); // jg.writeNumber(cds.getXmiIdAsInt(addr)); // } // } private void maybeWriteTypeFeat(int typeCode) throws IOException { if (indexId || isEmbedded) { jg.writeFieldName(TYPE_NAME); jg.writeString(getSerializedTypeName(typeCode)); } } @Override protected void writeFs(int addr, int typeCode) throws IOException { writeFsOrLists(addr, typeCode, false); } @Override protected void writeListsAsIndividualFSs(int addr, int typeCode) throws IOException { writeFsOrLists(addr, typeCode, true); } private void writeFsOrLists(int addr, int typeCode, boolean isListAsFSs) throws IOException { final int[] feats = cds.tsi.ll_getAppropriateFeatures(typeCode); // maybeWriteIdFeat(addr); maybeWriteTypeFeat(typeCode); for (final int featCode : feats) { if (cds.isFiltering) { // skip features that aren't in the target type system String fullFeatName = cds.tsi.ll_getFeatureForCode(featCode).getName(); if (cds.filterTypeSystem.getFeatureByFullName(fullFeatName) == null) { continue; } } final int featAddr = addr + cds.cas.getFeatureOffset(featCode); final int featValRaw = cds.cas.getHeapValue(featAddr); final int featureClass = cds.classifyType(cds.tsi.range(featCode)); switch (featureClass) { case LowLevelCAS.TYPE_CLASS_BYTE: case LowLevelCAS.TYPE_CLASS_SHORT: case LowLevelCAS.TYPE_CLASS_INT: if (featValRaw == 0 && isOmitDefaultValues) continue; jg.writeFieldName(getShortFeatureName(featCode)); jg.writeNumber(featValRaw); break; case LowLevelCAS.TYPE_CLASS_FS: if (featValRaw == 0/* && isOmitDefaultValues*/) continue; writeFsOrRef(featValRaw, featCode); // writes nl before embedded fs break; case LowLevelCAS.TYPE_CLASS_LONG: final long longVal = cds.cas.ll_getLongValue(featValRaw); if (longVal == 0L && isOmitDefaultValues) continue; jg.writeFieldName(getShortFeatureName(featCode)); jg.writeNumber(longVal); break; case LowLevelCAS.TYPE_CLASS_FLOAT: final float floatVal = CASImpl.int2float(featValRaw); if (floatVal == 0.F && isOmitDefaultValues) continue; jg.writeFieldName(getShortFeatureName(featCode)); jg.writeNumber(floatVal); break; case LowLevelCAS.TYPE_CLASS_DOUBLE: final double doubleVal = cds.cas.ll_getDoubleValue(addr, featCode); if (doubleVal == 0L && isOmitDefaultValues) continue; jg.writeFieldName(getShortFeatureName(featCode)); jg.writeNumber(doubleVal); break; case LowLevelCAS.TYPE_CLASS_BOOLEAN: jg.writeFieldName(getShortFeatureName(featCode)); jg.writeBoolean(cds.cas.ll_getBooleanValue(addr, featCode)); break; case LowLevelCAS.TYPE_CLASS_STRING: if (featValRaw == 0 /*&& isOmitDefaultValues*/) continue; jg.writeFieldName(getShortFeatureName(featCode)); jg.writeString(cds.cas.getStringForCode(featValRaw)); break; // all other fields (arrays, lists, fsRefs) can be null and are omitted if so default: if (featValRaw != CASImpl.NULL /*|| !isOmitDefaultValues*/) { jg.writeFieldName(getShortFeatureName(featCode)); if (featureClass == LowLevelCAS.TYPE_CLASS_INTARRAY || featureClass == LowLevelCAS.TYPE_CLASS_FLOATARRAY || featureClass == LowLevelCAS.TYPE_CLASS_BOOLEANARRAY || featureClass == LowLevelCAS.TYPE_CLASS_BYTEARRAY || featureClass == LowLevelCAS.TYPE_CLASS_SHORTARRAY || featureClass == LowLevelCAS.TYPE_CLASS_LONGARRAY || featureClass == LowLevelCAS.TYPE_CLASS_DOUBLEARRAY || featureClass == LowLevelCAS.TYPE_CLASS_STRINGARRAY || featureClass == LowLevelCAS.TYPE_CLASS_FSARRAY) { if (isDynamicOrStaticMultiRef(featCode, featValRaw)) { jg.writeNumber(cds.getXmiIdAsInt(featValRaw)); } else { writeJsonArrayValues(featValRaw, featureClass); } } else if (featureClass == CasSerializerSupport.TYPE_CLASS_INTLIST || featureClass == CasSerializerSupport.TYPE_CLASS_FLOATLIST || featureClass == CasSerializerSupport.TYPE_CLASS_STRINGLIST || featureClass == CasSerializerSupport.TYPE_CLASS_FSLIST) { if (isDynamicOrStaticMultiRef(featCode, featValRaw, isListAsFSs)) { jg.writeNumber(cds.getXmiIdAsInt(featValRaw)); } else { writeJsonListValues(featValRaw); } } else { // is error throw new RuntimeException("Invalid State, featureClass was "+ featureClass); } } // end of default case with non-null values } // end of switch } // end of loop over all features } /** * for arrays and lists, * recursively write one FS, * as actual FS, * if dynamic embedding and single ref * OR, just write the reference id * If trying to write the null FS (due to filtering for instance), write 0 * @param addr * @throws IOException */ private void writeFsOrRef(int addr) throws IOException { if (addr == 0 || null == cds.multiRefFSs || cds.multiRefFSs.contains(addr)) { jg.writeNumber(cds.getXmiIdAsInt(addr)); } else { isEmbeddedFromFsFeature = false; writeEmbeddedFs(addr); } } private void writeEmbeddedFs(int addr) throws IOException { boolean savedEmbedded = isEmbedded; try { isEmbedded = true; cds.encodeFS(addr); } catch (Exception e) { if (e instanceof IOException) { throw (IOException) e; } throw new RuntimeException(e); } finally { isEmbedded = savedEmbedded; } // embed } private void writeFsOrRef(int addr, int featCode) throws IOException { if (addr == 0 || null == cds.multiRefFSs || cds.multiRefFSs.contains(addr)) { jg.writeFieldName(getShortFeatureName(featCode)); jg.writeNumber(cds.getXmiIdAsInt(addr)); } else { jch.writeNlJustBeforeNext(); jg.writeFieldName(getShortFeatureName(featCode)); isEmbeddedFromFsFeature = true; // Use cases: can write embed, which has embed, which has non-embed // once hit non-embed, this flag would be turned off, // But it's only tested at the beginning of writeEmbeddedFs, so subsequent fields reset this // This flag only used to control new lines for embedded case writeEmbeddedFs(addr); isEmbeddedFromFsFeature = false; // restore default } } /** * Write FSArrays */ @Override protected void writeArrays(int addr, int typeCode, int typeClass) throws IOException { // maybeWriteIdFeat(addr); maybeWriteTypeFeat(typeCode); jg.writeFieldName(COLLECTION_NAME); writeJsonArrayValues(addr, typeClass); } @Override protected void writeEndOfIndividualFs() throws IOException { jg.writeEndObject(); } // writes a set of values in a JSON array // or null if the reference to the UIMA array is actually null // 0 length arrays are written as [] // Note: FSs can be embedded for FS Arrays private void writeJsonArrayValues(int addr, int arrayType) throws IOException { if (addr == CASImpl.NULL) { jg.writeNull(); return; } cds.visited_not_yet_written.remove(addr); final int array_size = cds.cas.ll_getArraySize(addr); if (arrayType == LowLevelCAS.TYPE_CLASS_BYTEARRAY) { // special case for byte arrays: // serialize using standard JACKSON/JSON binary serialization // (doing extra copy to avoid figuring out the impl details) ByteArrayFS byteArrayFS = new ByteArrayFSImpl(addr, cds.cas); int length = byteArrayFS.size(); byte[] byteArray = new byte[length]; byteArrayFS.copyToArray(0, byteArray, 0, length); jg.writeBinary(byteArray); } else { jg.writeStartArray(); int pos = cds.cas.getArrayStartAddress(addr); if (arrayType == LowLevelCAS.TYPE_CLASS_FSARRAY) { List ootsArrayElementsList = cds.sharedData == null ? null : cds.sharedData.getOutOfTypeSystemArrayElements(addr); int ootsIndex = 0; for (int j = 0; j < array_size; j++) { // j used to id the oots things int heapValue = cds.cas.getHeapValue(pos++); if (heapValue == CASImpl.NULL) { // this null array element might have been a reference to an // out-of-typesystem FS, which, when deserialized, was replaced with NULL, // so check the ootsArrayElementsList boolean found = false; if (ootsArrayElementsList != null) { while (ootsIndex < ootsArrayElementsList.size()) { XmiArrayElement arel = ootsArrayElementsList.get(ootsIndex++); if (arel.index == j) { jg.writeNumber(Integer.parseInt(arel.xmiId)); found = true; break; } } } if (!found) { jg.writeNumber(0); } // else, not null FS ref } else { if (cds.isFiltering) { // return as null any references to types not in target TS String typeName = cds.tsi.ll_getTypeForCode(cds.cas.getHeapValue(addr)).getName(); if (cds.filterTypeSystem.getType(typeName) == null) { heapValue = CASImpl.NULL; } } writeFsOrRef(heapValue); // allow embedding in array } } // end of loop over all refs in FS array } else { for (int i = 0; i < array_size; i++) { if (arrayType == LowLevelCAS.TYPE_CLASS_BOOLEANARRAY) { jg.writeBoolean(cds.cas.ll_getBooleanArrayValue(addr, i)); } else if (arrayType == LowLevelCAS.TYPE_CLASS_STRINGARRAY) { jg.writeString(cds.cas.ll_getStringArrayValue(addr, i)); } else if (arrayType == LowLevelCAS.TYPE_CLASS_SHORTARRAY) { jg.writeNumber(cds.cas.ll_getShortArrayValue(addr, i)); } else if (arrayType == LowLevelCAS.TYPE_CLASS_INTARRAY) { jg.writeNumber(cds.cas.ll_getIntArrayValue(addr, i)); } else if (arrayType == LowLevelCAS.TYPE_CLASS_LONGARRAY) { jg.writeNumber(cds.cas.ll_getLongArrayValue(addr, i)); } else if (arrayType == LowLevelCAS.TYPE_CLASS_FLOATARRAY) { jg.writeNumber(cds.cas.ll_getFloatArrayValue(addr, i)); } else { jg.writeNumber(cds.cas.ll_getDoubleArrayValue(addr, i)); } } } jg.writeEndArray(); } } // a null ref is written as null // an empty list is written as [] /** * Only called if no sharing of list nodes exists (except for non-dynamic case) * Only called for list nodes referred to by Feature value slots in some FS. * @param curNode the address of the start of the list * @throws IOException */ private void writeJsonListValues(int curNode) throws IOException { if (curNode == CASImpl.NULL) { throw new RuntimeException("never happen"); } final ListUtils listUtils = cds.listUtils; final int startNodeType = cds.cas.getHeapValue(curNode); int headFeat = listUtils.getHeadFeatCode(startNodeType); int tailFeat = listUtils.getTailFeatCode(startNodeType); int neListType = listUtils.getNeListType(startNodeType); // non-empty final PositiveIntSet visited = new PositiveIntSet_impl(); jg.writeStartArray(); while (curNode != CASImpl.NULL) { cds.visited_not_yet_written.remove(curNode); final int curNodeType = cds.cas.getHeapValue(curNode); if (curNodeType != neListType) { // if not "non-empty" break; // would be the end element. a 0 is also treated as an end element } if (!visited.add(curNode)) { break; // loop detected, stop. no error report here, would be reported earlier during enqueue } final int val = cds.cas.getHeapValue(curNode + cds.cas.getFeatureOffset(headFeat)); if (curNodeType == listUtils.neStringListType) { jg.writeString(cds.cas.getStringForCode(val)); } else if (curNodeType == listUtils.neFloatListType) { jg.writeNumber(CASImpl.int2float(val)); } else if (curNodeType == listUtils.neFsListType) { writeFsOrRef(val); // maybe embed } else { // for ints jg.writeNumber(val); } curNode = cds.cas.getHeapValue(curNode + cds.cas.getFeatureOffset(tailFeat)); } jg.writeEndArray(); } /** * Return null or a string representing the type of the feature * * * @param fsClass the class of the feature * @param featCode the feature code * @return _ref, _array, _byte_array, or null */ private SerializedString featureTypeLabel(int fsClass, int featCode) { switch (fsClass) { case LowLevelCAS.TYPE_CLASS_FS: case LowLevelCAS.TYPE_CLASS_FSARRAY: case CasSerializerSupport.TYPE_CLASS_FSLIST: return FEATURE_REFS_NAME; case LowLevelCAS.TYPE_CLASS_INTARRAY: case LowLevelCAS.TYPE_CLASS_FLOATARRAY: case LowLevelCAS.TYPE_CLASS_STRINGARRAY: case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY: case LowLevelCAS.TYPE_CLASS_SHORTARRAY: case LowLevelCAS.TYPE_CLASS_LONGARRAY: case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY: case CasSerializerSupport.TYPE_CLASS_INTLIST: case CasSerializerSupport.TYPE_CLASS_FLOATLIST: case CasSerializerSupport.TYPE_CLASS_STRINGLIST: // we have refs only if the feature has // multipleReferencesAllowed = true return FEATURE_ARRAY_NAME; case LowLevelCAS.TYPE_CLASS_BYTEARRAY: return FEATURE_BYTE_ARRAY_NAME; default: // for primitives return null; } } /** * Converts a UIMA-style dotted type name to the element name that should be used in the * serialization. The XMI element name consists of three parts - the Namespace URI, the Local * Name, and the QName (qualified name). * * @param uimaTypeName * a UIMA-style dotted type name * @return a data structure holding the three components of the XML element name */ @Override protected XmlElementName uimaTypeName2XmiElementName(String uimaTypeName) { // split uima type name into namespace and short name String shortName; final int lastDotIndex = uimaTypeName.lastIndexOf('.'); if (lastDotIndex == -1) { // no namespace shortName = uimaTypeName; } else { shortName = uimaTypeName.substring(lastDotIndex + 1); } // convert short name to shared string, without interning, reduce GCs shortName = cds.getUniqueString(shortName); return new XmlElementName(uimaTypeName, shortName, shortName); // use short name for qname until namespaces needed } /** * Called to generate a new namespace prefix and add it to this element - due to a collision * @param xmlElementName */ @Override protected void addNameSpace(XmlElementName xmlElementName) { if (xmlElementName.qName.equals(xmlElementName.localName)) { // may have already had namespace added // split uima type name into namespace and short name String uimaTypeName = xmlElementName.nsUri; String shortName = xmlElementName.localName; final int lastDotIndex = uimaTypeName.lastIndexOf('.'); // determine what namespace prefix to use String prefix = cds.getNameSpacePrefix(uimaTypeName, uimaTypeName, lastDotIndex); xmlElementName.qName = cds.getUniqueString(prefix + ':' + shortName); } } private boolean isDynamicOrStaticMultiRef(int featCode, int addr) { return (cds.multiRefFSs == null) ? cds.isStaticMultiRef(featCode) : cds.multiRefFSs.contains(addr); } private boolean isDynamicOrStaticMultiRef(int featCode, int addr, boolean isListAsFSs) { return (cds.multiRefFSs == null) ? (isListAsFSs || cds.isStaticMultiRef(featCode)) : cds.multiRefFSs.contains(addr); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy