com.aliasi.features.InteractionFeatureExtractor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aliasi-lingpipe Show documentation
Show all versions of aliasi-lingpipe Show documentation
This is the original Lingpipe:
http://alias-i.com/lingpipe/web/download.html
There were not made any changes to the source code.
/*
* LingPipe v. 4.1.0
* Copyright (C) 2003-2011 Alias-i
*
* This program is licensed under the Alias-i Royalty Free License
* Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i
* Royalty Free License Version 1 for more details.
*
* You should have received a copy of the Alias-i Royalty Free License
* Version 1 along with this program; if not, visit
* http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
* Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
* +1 (718) 290-9170.
*/
package com.aliasi.features;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.FeatureExtractor;
import com.aliasi.util.ObjectToDoubleMap;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.Serializable;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
/**
* An {@code InteractionFeatureExtractor} produces interaction
* features between two feature extractors. The value of an
* interaction feature is the product of the values of the individual
* features. The feature itself will be prefixed with a specific
* string and the features for the interaction are separated by a
* specific string.
*
* If the two feature extractors are the same, then only
* one feature extraction is performed on an input and redundant
* features are removed.
*
*
Two Extractor Example
*
* If we have feature maps {"a"->1.5, "b"->2.0, "c"->3.0}
* and {"d"->1.0, "e"-> 0.25}
, with prefix I:
* and separator *
, then the interaction features are
* {"I:a*d"->1.5, "I:a*e"->0.375, "I:b*d"->2.0, "I:b*e"->0.5, "I:c*d"->3.0, "I:c*e"->0.75}
.
*
* One Extractor Example
*
* If only one extractor is involved, symmetry is applied and the
* features are not generated twice. For instance, if we have
* the single feature map {"x"->-1.5, "y"->2.0, "z"->1.0}
,
* the interaction feature map is
* {"I:x*x"->2.25, "I:x*y"->-3.0, "I:x*z"->-1.5, "I:y*y"->4.0, "I:y*z"->2.0, "I:z*z"->1.0}
.
*
* In order to preserve uniqueness of the feature output in the
* one-extractor case, we need to sort the features. So we make sure
* that the features are in sorted order in the combination (as
* determined by the natural sort order defined for {@code String} by
* its {@code compareTo(String)} method. Specifically, we get
* {@code "I:x*z"}, but not {@code "I:z*x"} in the case above.
* This is not an issue for the two-extractor case, as you get all
* pairs, with the first extractor's feature always coming first
* in the resulting feature.
*
*
Serialization
*
* An interaction feature extractor may be serialized if its
* component extractors are serializable.
*
* Thread Safety
*
* An interaction feature structure is thread safe if the component
* feature extractor(s) are thread safe.
*
* @author Bob Carpenter
* @version 3.9.2
* @since LingPipe3.9.2
*/
public class InteractionFeatureExtractor
implements FeatureExtractor,
Serializable {
static final long serialVersionUID = -8221138094563655817L;
private final String mPrefix;
private final String mSeparator;
private final FeatureExtractor mExtractor1;
private final FeatureExtractor mExtractor2;
/**
* Construct a feature extractor for interactions between
* the features extracted by the specified extractors, creating
* new features with the specified prefix and separator.
*
* @param prefix Prefix to prepend to interaction features.
* @param separator Separator between features in interaction features.
* @param extractor1 First feature extractor.
* @param extractor2 Second feature extractor.
*/
public InteractionFeatureExtractor(String prefix,
String separator,
FeatureExtractor extractor1,
FeatureExtractor extractor2) {
mPrefix = prefix;
mSeparator = separator;
mExtractor1 = extractor1;
mExtractor2 = extractor2;
}
/**
* Construct a feature extractor for interactions between the
* features extracted by the specified extractor, creating new
* features with the specified prefix and separator.
*
* @param prefix Prefix to prepend to interaction features.
* @param separator Separator between features in interaction features.
* @param extractor Feature extractor.
*/
public InteractionFeatureExtractor(String prefix,
String separator,
FeatureExtractor extractor) {
this(prefix,separator,extractor,extractor);
}
/**
* Return the interaction feature map for the specified input.
*
* @param in Input whose features are extracted.
* @return The interaction feature map for the input.
*/
public Map features(E in) {
return mExtractor1 == mExtractor2
? features1(in)
: features2(in);
}
Map features1(E in) {
Map featureMap
= mExtractor1.features(in);
String[] features
= featureMap.keySet().toArray(Strings.EMPTY_STRING_ARRAY);
Arrays.sort(features); // ,Collections.reverseOrder(Collections.reverseOrder()));
double[] values = new double[features.length];
for (int i = 0; i < values.length; ++i)
values[i] = featureMap.get(features[i]).doubleValue();
ObjectToDoubleMap featureMapResult
= new ObjectToDoubleMap();
for (int i = 0; i < features.length; ++i) {
String initial = mPrefix + features[i] + mSeparator;
for (int j = i; j < features.length; ++j) {
String feature = initial + features[j];
double value = values[i] * values[j];
featureMapResult.set(feature,value);
}
}
return featureMapResult;
}
Map features2(E in) {
Map features1
= mExtractor1.features(in);
Map features2
= mExtractor2.features(in);
ObjectToDoubleMap features
= new ObjectToDoubleMap();
for (Map.Entry entry1 : features1.entrySet()) {
String initial = mPrefix + entry1.getKey() + mSeparator;
double val1 = entry1.getValue().doubleValue();
for (Map.Entry entry2 : features2.entrySet()) {
String feature = initial + entry2.getKey();
double value = val1 * entry2.getValue().doubleValue();
features.set(feature,value);
}
}
return features;
}
Object writeReplace() {
return new Serializer(this);
}
static class Serializer extends AbstractExternalizable {
static final long serialVersionUID = -2678228697747811841L;
final InteractionFeatureExtractor mExtractor;
public Serializer() {
this(null);
}
public Serializer(InteractionFeatureExtractor extractor) {
mExtractor = extractor;
}
@Override
public void writeExternal(ObjectOutput out)
throws IOException {
out.writeUTF(mExtractor.mPrefix);
out.writeUTF(mExtractor.mSeparator);
boolean same = mExtractor.mExtractor1 == mExtractor.mExtractor2;
out.writeBoolean(same);
out.writeObject(mExtractor.mExtractor1);
if (!same)
out.writeObject(mExtractor.mExtractor2);
}
@Override
public Object read(ObjectInput in)
throws IOException, ClassNotFoundException {
String prefix = in.readUTF();
String separator = in.readUTF();
boolean same = in.readBoolean();
@SuppressWarnings("unchecked")
FeatureExtractor featureExtractor
= (FeatureExtractor)
in.readObject();
if (same) {
return new InteractionFeatureExtractor(prefix,separator,featureExtractor);
}
@SuppressWarnings("unchecked")
FeatureExtractor featureExtractor2
= (FeatureExtractor)
in.readObject();
return new InteractionFeatureExtractor(prefix,separator,featureExtractor,featureExtractor2);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy