All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.simiacryptus.text.LanguageModel Maven / Gradle / Ivy

/*
 * Copyright (c) 2019 by Andrew Charneski.
 *
 * The author licenses this file to you under the
 * Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance
 * with the License.  You may obtain a copy
 * of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.simiacryptus.text;

import com.simiacryptus.util.Util;
import org.apache.commons.compress.utils.IOUtils;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;

public enum LanguageModel {
  English("English.trie"), French("French.trie"), German("German.trie");

  private final String resource;
  private volatile CharTrie trie;

  LanguageModel(String resource) {
    this.resource = resource;
  }

  public CharTrie getTrie() {
    if (null == trie) {
      synchronized (this) {
        if (null == trie) {
          final byte[] bytes;
          try {
            bytes = IOUtils.toByteArray(getClass().getClassLoader().getResourceAsStream(resource));
          } catch (IOException e) {
            throw Util.throwException(e);
          }
          trie = new ConvolutionalTrieSerializer().deserialize(CompressionUtil.decodeLZ(bytes));
        }
      }
    }
    return trie;
  }

  @Nonnull
  public static LanguageModel match(@Nonnull String text) {
    return Arrays.stream(LanguageModel.values())
        .min(Comparator
            .comparing(model -> model.getTrie().getCodec().encodePPM(text, 2).bitLength)).get();
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy