All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.jhu.hlt.concrete.miscommunication.tokenized.CachedTokenizationCommunication Maven / Gradle / Ivy

There is a newer version: 4.15.0
Show newest version
/*
 * Copyright 2012-2015 Johns Hopkins University HLTCOE. All rights reserved.
 * See LICENSE in the project root directory.
 */
package edu.jhu.hlt.concrete.miscommunication.tokenized;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import edu.jhu.hlt.concrete.Communication;
import edu.jhu.hlt.concrete.Section;
import edu.jhu.hlt.concrete.Sentence;
import edu.jhu.hlt.concrete.Token;
import edu.jhu.hlt.concrete.Tokenization;
import edu.jhu.hlt.concrete.UUID;
import edu.jhu.hlt.concrete.miscommunication.MiscommunicationException;
import edu.jhu.hlt.concrete.miscommunication.sectioned.MappedSectionCommunication;
import edu.jhu.hlt.concrete.miscommunication.sentenced.CachedSentencedCommunication;
import edu.jhu.hlt.concrete.miscommunication.sentenced.MappedSentenceCommunication;

/**
 * Aggressively cached implementation of {@link MappedTokenizedCommunication},
 * {@link MappedSentenceCommunication}, and {@link MappedSectionCommunication}.
 * 

* Assumes that each {@link Sentence} object has at least one {@link Tokenization} object. If not, will throw a * {@link MiscommunicationException}. */ public class CachedTokenizationCommunication implements MappedTokenizedCommunication, MappedSentenceCommunication, MappedSectionCommunication { private final CachedSentencedCommunication cpy; private final Map tokenizationIdToTokenizationMap; private final Map> tokenizationIdToTokenIdxToTokenMap; public CachedTokenizationCommunication(final Communication orig) throws MiscommunicationException { this.cpy = new CachedSentencedCommunication(orig); Optional bs = this.cpy.getSentences().stream() .filter(s -> !validPredicate(s)) .findAny(); if (bs.isPresent()) throw new MiscommunicationException("At least one Sentence did not have a Tokenization (UUID = " + bs.get().getUuid().getUuidString() + ")."); final Map toRet = new LinkedHashMap<>(); final Map> uuidToIdxToTokenMap = new LinkedHashMap<>(); List stList = new ArrayList<>(this.cpy.getSentences()); for (Sentence st : stList) { Tokenization tok = st.getTokenization(); UUID tId = tok.getUuid(); toRet.put(tId, tok); final Map idToTokenMap = new LinkedHashMap<>(); if (tok.isSetTokenList()) for (Token t: tok.getTokenList().getTokenList()) { idToTokenMap.put(t.getTokenIndex(), t); uuidToIdxToTokenMap.put(tId, idToTokenMap); } } this.tokenizationIdToTokenizationMap = toRet; this.tokenizationIdToTokenIdxToTokenMap = uuidToIdxToTokenMap; } private final boolean validPredicate(final Sentence s) { return s.isSetTokenization(); } /* (non-Javadoc) * @see edu.jhu.hlt.concrete.miscommunication.sentenced.SentencedCommunication#getSentences() */ @Override public List getSentences() { return this.cpy.getSentences(); } /* (non-Javadoc) * @see edu.jhu.hlt.concrete.miscommunication.sectioned.SectionedCommunication#getSections() */ @Override public List
getSections() { return this.cpy.getSections(); } /* (non-Javadoc) * @see edu.jhu.hlt.concrete.miscommunication.WrappedCommunication#getRoot() */ @Override public Communication getRoot() { return this.cpy.getRoot(); } /* (non-Javadoc) * @see edu.jhu.hlt.concrete.miscommunication.sentenced.MappedSentenceCommunication#getUuidToSentenceMap() */ @Override public Map getUuidToSentenceMap() { return this.cpy.getUuidToSentenceMap(); } /* (non-Javadoc) * @see edu.jhu.hlt.concrete.miscommunication.sectioned.MappedSectionCommunication#getUuidToSectionMap() */ @Override public Map getUuidToSectionMap() { return this.cpy.getUuidToSectionMap(); } /* (non-Javadoc) * @see edu.jhu.hlt.concrete.miscommunication.tokenized.TokenizedCommunication#getTokenizations() */ @Override public List getTokenizations() { return new ArrayList<>(this.tokenizationIdToTokenizationMap.values()); } /* (non-Javadoc) * @see edu.jhu.hlt.concrete.miscommunication.tokenized.MappedTokenizedCommunication#getUuidToTokenizationMap() */ @Override public Map getUuidToTokenizationMap() { return new LinkedHashMap<>(this.tokenizationIdToTokenizationMap); } /* (non-Javadoc) * @see edu.jhu.hlt.concrete.miscommunication.tokenized.MappedTokenizedCommunication#getUuidToTokenIdxToTokenMap() */ @Override public Map> getUuidToTokenIdxToTokenMap() { return new LinkedHashMap<>(this.tokenizationIdToTokenIdxToTokenMap); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy