All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.basistech.rosette.dm.ConvertFromPreAdm11 Maven / Gradle / Ivy

There is a newer version: 3.0.3
Show newest version
/*
* Copyright 2016 Basis Technology Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.basistech.rosette.dm;

import com.basistech.rosette.RosetteRuntimeException;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;

import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;

/**
 * Utility class with methods for conversion of the data from
 * the pre-1.1.0 data model to the 1.1.0 data model.
 */
@SuppressWarnings("deprecation")
final class ConvertFromPreAdm11 {

    private ConvertFromPreAdm11() {
        //
    }

    static Mention convertMention(EntityMention em) {
        Mention.Builder mentionBuilder = new Mention.Builder(em.getStartOffset(), em.getEndOffset());
        if (em.getConfidence() != null) {
            mentionBuilder.confidence(em.getConfidence());
        }
        if (em.getLinkingConfidence() != null) {
            mentionBuilder.linkingConfidence(em.getLinkingConfidence());
        }
        if (em.getFlags() != null && em.getFlags() != 0) {
            mentionBuilder.extendedProperty("oldFlags", em.getFlags());
        }
        if (em.getCoreferenceChainId() != null) {
            mentionBuilder.extendedProperty("oldCoreferenceChainId", em.getCoreferenceChainId());
        }
        if (em.getNormalized() != null) {
            mentionBuilder.normalized(em.getNormalized());
        }
        if (em.getSource() != null) {
            mentionBuilder.source(em.getSource());
        }
        if (em.getSubsource() != null) {
            mentionBuilder.subsource(em.getSubsource());
        }

        if (em.getExtendedProperties() != null && !em.getExtendedProperties().isEmpty()) {
            for (Map.Entry me : em.getExtendedProperties().entrySet()) {
                mentionBuilder.extendedProperty(me.getKey(), me.getValue());
            }
        }

        mentionBuilder.extendedProperty("old-entity-type", em.getEntityType());

        return mentionBuilder.build();
    }

    /*
     * Each mention can in one of two states:
     * 1: attached to a 'ResolvedEntity'
     * 2: part of a coref chain; perhaps a singleton.
     *
     * In the second case, we need the same processing as we have
     * below in unresolved conversion; how to factor?
     */


    // there are at least indoc chains
    static void doResolvedConversion(ListAttribute newResolved,
                                     ListAttribute oldMentions,
                                     ListAttribute oldResolved,
                                     ImmutableMap.Builder builder) {

        if (oldMentions == null) {
            doNoMentionConversion(oldResolved, builder);
            return;
        }

        int maxChainId = -1;
        for (EntityMention oldMention : oldMentions) {
            if (oldMention.getCoreferenceChainId() != null) {
                maxChainId = Math.max(maxChainId, oldMention.getCoreferenceChainId());
            }
        }
        maxChainId = Math.max(maxChainId, oldMentions.size());

        ResolvedEntity[] resolvedByChainId = new ResolvedEntity[maxChainId + 1];
        if (oldResolved != null) {
            for (ResolvedEntity resolvedEntity : oldResolved) {
                if (resolvedEntity.getCoreferenceChainId() != null) {
                    resolvedByChainId[resolvedEntity.getCoreferenceChainId()] = resolvedEntity;
                } else {
                    throw new RosetteRuntimeException("Resolved entity with no coref chain id.");
                }
            }
        }

        // Note that indoc chain ids can be sparse, or altogether absent.
        // Absent is important, as it means that no indoc happened.
        // If any indoc happened, all the mentions have indoc chains.
        boolean indocPresent = !oldMentions.isEmpty() && oldMentions.get(0).getCoreferenceChainId() != null;


        int[] newIndices = new int[maxChainId + 1];
        //chain ids cannot be larger than the count of mentions.
        int[] chainToIndex = new int[maxChainId + 1];
        Arrays.fill(chainToIndex, -1);
        int newEntityCount = 0;

        for (int oldIndex = 0; oldIndex < oldMentions.size(); oldIndex++) {
            EntityMention em = oldMentions.get(oldIndex);
            if (em.getCoreferenceChainId() != null && em.getCoreferenceChainId() != -1) {
                if (chainToIndex[em.getCoreferenceChainId()] == -1) {
                    chainToIndex[em.getCoreferenceChainId()] = newEntityCount++;
                }
                newIndices[oldIndex] = chainToIndex[em.getCoreferenceChainId()];
            } else {
                newIndices[oldIndex] = newEntityCount++;
            }
        }

        List> mentionsByEntities = Lists.newArrayListWithExpectedSize(newEntityCount);
        for (int x = 0; x < newEntityCount; x++) {
            mentionsByEntities.add(Lists.newArrayList());
        }

        /* For each coref chain, the head is the entity whose index in the old mentions
         * is equal to the chain id.
         */
        int[] heads = new int[newEntityCount];

        for (int oldIndex = 0; oldIndex < oldMentions.size(); oldIndex++) {
            EntityMention em = oldMentions.get(oldIndex);
            int newIndex = newIndices[oldIndex];
            mentionsByEntities.get(newIndex).add(em);
            if (em.getCoreferenceChainId() != null && em.getCoreferenceChainId() == oldIndex) {
                heads[newIndex] = mentionsByEntities.get(newIndex).size() - 1;
            }
        }


        ListAttribute.Builder elBuilder = buildEntities(newResolved, oldMentions, resolvedByChainId, indocPresent, mentionsByEntities, heads);

        builder.put(AttributeKey.ENTITY.key(), elBuilder.build());
    }

    // We can have old resolved entities without mentions.
    private static void doNoMentionConversion(ListAttribute oldResolved, ImmutableMap.Builder builder) {
        ListAttribute.Builder elBuilder = new ListAttribute.Builder<>(Entity.class);
        for (ResolvedEntity resolvedEntity : oldResolved) {
            Entity.Builder enBuilder = new Entity.Builder();
            convertOneEntity(enBuilder, resolvedEntity, null);
            elBuilder.add(enBuilder.build());
        }
        builder.put(AttributeKey.ENTITY.key(), elBuilder.build());
    }

    private static ListAttribute.Builder buildEntities(ListAttribute newResolved, ListAttribute oldMentions,
                                                               ResolvedEntity[] resolvedByChainId,
                                                               boolean indocPresent,
                                                               List> mentionsByEntities,
                                                               int[] heads) {
        // Since we need to sort, put them in an ordinary list for a start.
        List entities = Lists.newArrayList();
        if (newResolved != null) {
            entities.addAll(newResolved);
        }

        // now we can just walk mentionsByEntities to build the results.
        int newIndex = 0;
        for (List entityMentions : mentionsByEntities) {
            Entity.Builder enBuilder = new Entity.Builder();
            String type = null;
            List mentions = Lists.newArrayList();
            for (int x = 0; x < entityMentions.size(); x++) {
                EntityMention em = entityMentions.get(x);
                if (x == heads[newIndex]) {
                    type = em.getEntityType();
                }
                Mention mention = convertMention(em);
                mentions.add(mention);

            }

            Collections.sort(mentions, new Comparator() {
                @Override
                public int compare(Mention o1, Mention o2) {
                    if (o1.getStartOffset() == o2.getStartOffset()) {
                        return o1.getEndOffset() - o2.getEndOffset();
                    } else {
                        return o1.getStartOffset() - o2.getStartOffset();
                    }
                }
            });

            for (Mention mention : mentions) {
                enBuilder.mention(mention);
            }
            enBuilder.type(type);

            EntityMention entityMention = entityMentions.get(0);
            ResolvedEntity resolvedEntity = null;
            if (entityMention.getCoreferenceChainId() != null && entityMention.getCoreferenceChainId() != -1) {
                resolvedEntity = resolvedByChainId[entityMention.getCoreferenceChainId()];
            }

            if (resolvedEntity != null) {
                convertOneEntity(enBuilder, resolvedEntity, entityMention.getCoreferenceChainId());
            } else if (entityMention.getCoreferenceChainId() != null) {
                // no resolved entity, but we still need a coref chain.
                enBuilder.extendedProperty("oldCoreferenceChainId", entityMention.getCoreferenceChainId());
                // Also implement the 'T' entity id convention -- if we have no Qid from a linked entity,
                // generate one from the coref chain id.
                enBuilder.entityId(String.format("T%d", entityMention.getCoreferenceChainId()));
            }

            if (indocPresent) {
                enBuilder.headMentionIndex(heads[newIndex]);
            }

            entities.add(enBuilder.build());
            newIndex++;
        }

        if (indocPresent) {
            Collections.sort(entities, new Comparator() {
                @Override
                public int compare(Entity o1, Entity o2) {
                    if (o1.getMentions() == null || o1.getMentions().size() == 0) {
                        if (o1.getMentions() == null || o2.getMentions().size() == 0) {
                            return 0;
                        } else {
                            return -1;
                        }
                    }

                    Mention m1 = o1.getMentions().get(0);
                    Mention m2 = o2.getMentions().get(0);

                    if (m1.getStartOffset() == m2.getStartOffset()) {
                        return m1.getEndOffset() - m2.getEndOffset();
                    } else {
                        return m1.getStartOffset() - m2.getStartOffset();
                    }
                }
            });
        }

        ListAttribute.Builder elBuilder = new ListAttribute.Builder<>(Entity.class);
        for (Entity entity : entities) {
            elBuilder.add(entity);
        }


        if (oldMentions.getExtendedProperties() != null) {
            for (Map.Entry me : oldMentions.getExtendedProperties().entrySet()) {
                elBuilder.extendedProperty("mention." + me.getKey(),
                        me.getValue());
            }
        }

        if (newResolved != null && newResolved.getExtendedProperties().size() != 0) {
            for (Map.Entry me : newResolved.getExtendedProperties().entrySet()) {
                elBuilder.extendedProperty(me.getKey(),  me.getValue());
            }
        }

        return elBuilder;
    }

    private static void convertOneEntity(Entity.Builder enBuilder, ResolvedEntity resolvedEntity, Integer indocChainId) {
        enBuilder.entityId(resolvedEntity.getEntityId());
        enBuilder.confidence(resolvedEntity.getConfidence());
        if (resolvedEntity.getSentiment() != null) {
            // It's a list in the new code, a single item in the old code.
            enBuilder.sentiment(resolvedEntity.getSentiment());
        }
        if (resolvedEntity.getExtendedProperties() != null) {
            for (Map.Entry me : resolvedEntity.getExtendedProperties().entrySet()) {
                enBuilder.extendedProperty(me.getKey(), me.getValue());
            }
        }
        if (resolvedEntity.getCoreferenceChainId() != null) {
            enBuilder.extendedProperty("oldCoreferenceChainId", resolvedEntity.getCoreferenceChainId());
            if (resolvedEntity.getEntityId() == null && indocChainId != null) {
                enBuilder.entityId(String.format("T%d", indocChainId));
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy