All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.parser.ParserModel Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.parser;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.Map;
import java.util.Objects;

import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.model.ArtifactSerializer;
import opennlp.tools.util.model.BaseModel;
import opennlp.tools.util.model.ChunkerModelSerializer;
import opennlp.tools.util.model.POSModelSerializer;

/**
 * This is an abstract base class for {@link ParserModel} implementations.
 */
// TODO: Model should validate the artifact map
public class ParserModel extends BaseModel {

  private static class HeadRulesSerializer implements
      ArtifactSerializer {

    public opennlp.tools.parser.lang.en.HeadRules create(InputStream in)
        throws IOException, InvalidFormatException {
      return new opennlp.tools.parser.lang.en.HeadRules(new BufferedReader(
          new InputStreamReader(in, StandardCharsets.UTF_8)));
    }

    public void serialize(opennlp.tools.parser.lang.en.HeadRules artifact,
        OutputStream out) throws IOException {
      artifact.serialize(new OutputStreamWriter(out, StandardCharsets.UTF_8));
    }
  }

  private static final String COMPONENT_NAME = "Parser";

  private static final String BUILD_MODEL_ENTRY_NAME = "build.model";

  private static final String CHECK_MODEL_ENTRY_NAME = "check.model";

  private static final String ATTACH_MODEL_ENTRY_NAME = "attach.model";

  private static final String PARSER_TAGGER_MODEL_ENTRY_NAME = "parsertager.postagger";

  private static final String CHUNKER_TAGGER_MODEL_ENTRY_NAME = "parserchunker.chunker";

  private static final String HEAD_RULES_MODEL_ENTRY_NAME = "head-rules.headrules";

  private static final String PARSER_TYPE = "parser-type";

  public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel,
      MaxentModel attachModel, POSModel parserTagger,
      ChunkerModel chunkerTagger, opennlp.tools.parser.HeadRules headRules,
      ParserType modelType, Map manifestInfoEntries) {

    super(COMPONENT_NAME, languageCode, manifestInfoEntries);

    setManifestProperty(PARSER_TYPE, modelType.name());

    artifactMap.put(BUILD_MODEL_ENTRY_NAME, buildModel);

    artifactMap.put(CHECK_MODEL_ENTRY_NAME, checkModel);

    if (ParserType.CHUNKING.equals(modelType)) {
      if (attachModel != null)
          throw new IllegalArgumentException("attachModel must be null for chunking parser!");
    }
    else if (ParserType.TREEINSERT.equals(modelType)) {
      Objects.requireNonNull(attachModel, "attachModel must not be null");
      artifactMap.put(ATTACH_MODEL_ENTRY_NAME, attachModel);
    }
    else {
      throw new IllegalStateException("Unknown ParserType '" + modelType + "'!");
    }

    artifactMap.put(PARSER_TAGGER_MODEL_ENTRY_NAME, parserTagger);

    artifactMap.put(CHUNKER_TAGGER_MODEL_ENTRY_NAME, chunkerTagger);

    artifactMap.put(HEAD_RULES_MODEL_ENTRY_NAME, headRules);
    checkArtifactMap();
  }

  public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel,
      MaxentModel attachModel, POSModel parserTagger,
      ChunkerModel chunkerTagger, opennlp.tools.parser.HeadRules headRules,
      ParserType modelType) {
    this (languageCode, buildModel, checkModel, attachModel, parserTagger,
        chunkerTagger, headRules, modelType, null);
  }

  public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel,
      POSModel parserTagger, ChunkerModel chunkerTagger,
      opennlp.tools.parser.HeadRules headRules, ParserType type,
      Map manifestInfoEntries) {
    this (languageCode, buildModel, checkModel, null, parserTagger,
        chunkerTagger, headRules, type, manifestInfoEntries);
  }

  public ParserModel(InputStream in) throws IOException {
    super(COMPONENT_NAME, in);
  }

  public ParserModel(File modelFile) throws IOException {
    super(COMPONENT_NAME, modelFile);
  }

  public ParserModel(Path modelPath) throws IOException {
    this(modelPath.toFile());
  }

  public ParserModel(URL modelURL) throws IOException {
    super(COMPONENT_NAME, modelURL);
  }

  @Override
  protected void createArtifactSerializers(
      Map serializers) {

    super.createArtifactSerializers(serializers);

    // In 1.6.x the headrules artifact is serialized with the new API
    // which uses the Serializeable interface
    // This change is not backward compatible with the 1.5.x models.
    // In order to laod 1.5.x model the English headrules serializer must be
    // put on the serializer map.

    if (getVersion().getMajor() == 1 && getVersion().getMinor() == 5) {
      serializers.put("headrules", new HeadRulesSerializer());
    }

    serializers.put("postagger", new POSModelSerializer());
    serializers.put("chunker", new ChunkerModelSerializer());
  }

  public ParserType getParserType() {
    return ParserType.parse(getManifestProperty(PARSER_TYPE));
  }

  public MaxentModel getBuildModel() {
    return (MaxentModel) artifactMap.get(BUILD_MODEL_ENTRY_NAME);
  }

  public MaxentModel getCheckModel() {
    return (MaxentModel) artifactMap.get(CHECK_MODEL_ENTRY_NAME);
  }

  public MaxentModel getAttachModel() {
    return (MaxentModel) artifactMap.get(ATTACH_MODEL_ENTRY_NAME);
  }

  public POSModel getParserTaggerModel() {
    return (POSModel) artifactMap.get(PARSER_TAGGER_MODEL_ENTRY_NAME);
  }

  public ChunkerModel getParserChunkerModel() {
    return (ChunkerModel) artifactMap.get(CHUNKER_TAGGER_MODEL_ENTRY_NAME);
  }

  public opennlp.tools.parser.HeadRules getHeadRules() {
    return (opennlp.tools.parser.HeadRules)
        artifactMap.get(HEAD_RULES_MODEL_ENTRY_NAME);
  }

  // TODO: Update model methods should make sure properties are copied correctly ...
  public ParserModel updateBuildModel(MaxentModel buildModel) {
    return new ParserModel(getLanguage(), buildModel, getCheckModel(), getAttachModel(),
        getParserTaggerModel(), getParserChunkerModel(),
        getHeadRules(), getParserType());
  }

  public ParserModel updateCheckModel(MaxentModel checkModel) {
    return new ParserModel(getLanguage(), getBuildModel(), checkModel,
        getAttachModel(), getParserTaggerModel(),
        getParserChunkerModel(), getHeadRules(), getParserType());
  }

  public ParserModel updateTaggerModel(POSModel taggerModel) {
    return new ParserModel(getLanguage(), getBuildModel(), getCheckModel(), getAttachModel(),
        taggerModel, getParserChunkerModel(), getHeadRules(), getParserType());
  }

  public ParserModel updateChunkerModel(ChunkerModel chunkModel) {
    return new ParserModel(getLanguage(), getBuildModel(), getCheckModel(), getAttachModel(),
        getParserTaggerModel(), chunkModel, getHeadRules(), getParserType());
  }

  @Override
  protected void validateArtifactMap() throws InvalidFormatException {
    super.validateArtifactMap();

    if (!(artifactMap.get(BUILD_MODEL_ENTRY_NAME)  instanceof AbstractModel)) {
      throw new InvalidFormatException("Missing the build model!");
    }

    ParserType modelType = getParserType();

    if (modelType != null) {
      if (ParserType.CHUNKING.equals(modelType)) {
        if (artifactMap.get(ATTACH_MODEL_ENTRY_NAME) != null)
            throw new InvalidFormatException("attachModel must be null for chunking parser!");
      }
      else if (ParserType.TREEINSERT.equals(modelType)) {
        if (!(artifactMap.get(ATTACH_MODEL_ENTRY_NAME)  instanceof AbstractModel))
          throw new InvalidFormatException("attachModel must not be null!");
      }
      else {
        throw new InvalidFormatException("Unknown ParserType '" + modelType + "'!");
      }
    }
    else {
      throw new InvalidFormatException("Missing the parser type property!");
    }

    if (!(artifactMap.get(CHECK_MODEL_ENTRY_NAME)  instanceof AbstractModel)) {
      throw new InvalidFormatException("Missing the check model!");
    }

    if (!(artifactMap.get(PARSER_TAGGER_MODEL_ENTRY_NAME)  instanceof POSModel)) {
      throw new InvalidFormatException("Missing the tagger model!");
    }

    if (!(artifactMap.get(CHUNKER_TAGGER_MODEL_ENTRY_NAME)  instanceof ChunkerModel)) {
      throw new InvalidFormatException("Missing the chunker model!");
    }

    if (!(artifactMap.get(HEAD_RULES_MODEL_ENTRY_NAME)  instanceof HeadRules)) {
      throw new InvalidFormatException("Missing the head rules!");
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy