All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.tokenizer.TokenFeatureExtractor Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
/*
 * LingPipe v. 4.1.0
 * Copyright (C) 2003-2011 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

package com.aliasi.tokenizer;

import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Compilable;
import com.aliasi.util.Counter;
import com.aliasi.util.FeatureExtractor;
import com.aliasi.util.ObjectToCounterMap;
import com.aliasi.util.Strings;

import java.util.Map;

import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.Serializable;

/**
 * A TokenFeatureExtractor produces feature vectors from
 * character sequences representing token counts.
 *
 * 

Serialization

* *

The token feature extractors implement the {@link Serializable} * interface. A token feature extractor will actually be serializable * if the underlying tokenizer factory is serializable, either by * implementing the {@link Serializable} interface or the {@link * Compilable} interface. If it is not, attempting to serialize the * feature extractor will throw an exception. * * @author Bob Carpenter * @version 3.8 * @since LingPipe3.1 */ public class TokenFeatureExtractor implements FeatureExtractor, Serializable { static final long serialVersionUID = -1946484959983081450L; private final TokenizerFactory mTokenizerFactory; /** * Construct a token-based feature extractor from the * specified tokenizer factory. * * @param factory Tokenizer factory to use for tokenization. */ public TokenFeatureExtractor(TokenizerFactory factory) { mTokenizerFactory = factory; } /** * Return the feature vector for the specified character sequence. * The keys are the tokens extracted and their values is the count * of the token in the input character sequence. * * @param in Character sequence from which to extract features. * @return Mapping from tokens in the input sequence to their * counts. */ public Map features(CharSequence in) { ObjectToCounterMap map = new ObjectToCounterMap(); char[] cs = Strings.toCharArray(in); Tokenizer tokenizer = mTokenizerFactory.tokenizer(cs,0,cs.length); for (String token : tokenizer) map.increment(token); return map; } /** * Returns a description of this token feature extractor including * its contained tokenizer factory. This method calls the {@code * toString()} method of the contained tokenizer factory. * * @return A description of this token feature * extractor and its contained tokenizer factory. */ @Override public String toString() { return "com.aliasi.tokenizer.TokenFeatureExtractor(" + mTokenizerFactory + ")"; } private Object writeReplace() { return new Externalizer(this); } static class Externalizer extends AbstractExternalizable { static final long serialVersionUID = 4716086241839692672L; private final TokenFeatureExtractor mExtractor; public Externalizer() { this(null); } public Externalizer(TokenFeatureExtractor extractor) { mExtractor = extractor; } @Override public void writeExternal(ObjectOutput out) throws IOException { AbstractExternalizable.compileOrSerialize(mExtractor.mTokenizerFactory, out); } @Override public Object read(ObjectInput in) throws ClassNotFoundException, IOException { TokenizerFactory factory = (TokenizerFactory) in.readObject(); return new TokenFeatureExtractor(factory); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy