All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.chunk.ChunkingEvaluation Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
/*
 * LingPipe v. 4.1.0
 * Copyright (C) 2003-2011 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

package com.aliasi.chunk;

import com.aliasi.classify.PrecisionRecallEvaluation;

import com.aliasi.util.Strings;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

/**
 * A ChunkingEvaluation stores and reports the results of
 * evaluating response chunkings against reference chunkings.  Cases
 * to evaluate are supplied in the form of a reference and response
 * chunking through the method {@link #addCase(Chunking,Chunking)}.
 *
 * 

The sets of true positive, * false positive and false negative chunks are available through the * methods {@link #truePositiveSet()}, {@link #falsePositiveSet()}, * and {@link #falseNegativeSet()}. True positives are chunks that * are in both the reference and response, false positives are chunks * in the response but not the reference, and false negatives are in * the reference, but not the response. There is no notion of true * negative in this task, a fact that is reflected in the results of * the precision-recall evaluation. * *

The main method of reporting is through an instance of {@link * com.aliasi.classify.ScoredPrecisionRecallEvaluation} returned by * the method {@link #precisionRecallEvaluation()}. The return result * provides an object capable of extensive reporting for scored * classification tasks such as chunking. The instances of true and * false positive and negatives are described above; their scores are * derived from response scores. * *

This evaluator works solely on the basis of chunk offset and * exact match. There is no notion of alignment or mapping, as found, * for example, in the MUC * Scoring Software User's Manual, and its descendants such as the * 2005 * ACE Evaluation Plan. In this regard, we follow the model of * CoNLL 2000 * Chunking Task. * *

This evaluation is able to handle overlapping chunks with * results being reported in the same manner. In particular, the * labeled precision and recall components of the approach that later * became known as PARSEVAL can * be generated by using the ChunkingEvaluation class. * * @author Bob Carpenter * @version 3.8 * @since LingPipe2.1 */ public class ChunkingEvaluation { private final Set mCases = new HashSet(); private final Set mTruePositiveSet = new HashSet(); private final Set mFalsePositiveSet = new HashSet(); private final Set mFalseNegativeSet = new HashSet(); String mLastCase = null; /** * Construct a chunking evaluation. */ public ChunkingEvaluation() { /* do nothing */ } /** * Return the set of cases consisting of pairs of reference and * response chunkings. The elements of the set returned are of * type Chunking[], with the first element being the * reference chunk and the second element being the response * chunk. * *

The set returned is an unmodifiable view of the underlying * set of cases and will change as cases are added to this * evaluation. * * @return The set of cases. */ public Set cases() { return Collections.unmodifiableSet(mCases); } /** * Returns a chunking evaluation which consists of the current * chunking evaluation restricted to the specified type. A * new evaluation is constructed and populated with the same * cases as this evaluation, but with the reference and response * chunkings both restricted to only include answers of the * specified type. * * @param chunkType Type of chunk to be evaluated. * @return ChunkingEvaluation Evaluation for this type. */ public ChunkingEvaluation perTypeEvaluation(String chunkType) { ChunkingEvaluation evaluation = new ChunkingEvaluation(); for (Chunking[] testCase : cases()) { Chunking referenceChunking = testCase[0]; Chunking responseChunking = testCase[1]; Chunking referenceChunkingRestricted = restrictTo(referenceChunking,chunkType); Chunking responseChunkingRestricted = restrictTo(responseChunking,chunkType); evaluation.addCase(referenceChunkingRestricted, responseChunkingRestricted); } return evaluation; } static Chunking restrictTo(Chunking chunking, String type) { CharSequence cs = chunking.charSequence(); ChunkingImpl chunkingOut = new ChunkingImpl(cs); for (Chunk chunk : chunking.chunkSet()) if (chunk.type().equals(type)) chunkingOut.add(chunk); return chunkingOut; } static String formatChunks(Chunking chunking) { StringBuilder sb = new StringBuilder(); int pos = 0; for (Chunk chunk : chunking.chunkSet()) { int start = chunk.start(); int padLength = start-pos; for (int j = 0; j < padLength; ++j) sb.append(" "); int end = chunk.end(); int chunkLength = end-start; char marker = chunk.type().length() > 0 ? chunk.type().charAt(0) : '!'; if (chunkLength > 0) sb.append(marker); for (int j = 1; j < chunkLength; ++j) sb.append("."); pos = end; } sb.append("\n"); return sb.toString(); } static String formatHeader(int indent, Chunking chunking) { String cs = chunking.charSequence().toString(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < indent; ++i) sb.append(" "); sb.append("CHUNKS= "); for (Chunk chunk : chunking.chunkSet()) { sb.append("(" + chunk.start() + "," + chunk.end() + "):" + chunk.type() + " "); } if (sb.charAt(sb.length()-1) != '\n') sb.append("\n"); for (int i = 0; i < indent; ++i) sb.append(" "); sb.append(cs); sb.append("\n"); int length = cs.length(); printMods(1,length, sb,indent); printMods(10,length, sb,indent); printMods(100,length, sb,indent); if (sb.charAt(sb.length()-1) != '\n') sb.append("\n"); return sb.toString(); } static void printMods(int base, int length, StringBuilder sb, int indent) { if (length <= base) return; for (int i = 0; i < indent; ++i) sb.append(" "); for (int i = 0; i < length; ++i) { if (base == 1 || (i >= base && i % 10 == 0)) sb.append(Integer.toString((i/base)%10)); else sb.append(" "); } sb.append("\n"); } /** * Add an evaluation case consisting of a reference chunk * set and a response chunk set. * * @param referenceChunking Chunking of reference chunks. * @param responseChunking Chunking of response chunks. * @throws IllegalArgumentException If the chunkings are not * over the same character sequence. */ public void addCase(Chunking referenceChunking, Chunking responseChunking) { StringBuilder sb = new StringBuilder(); CharSequence cSeq = referenceChunking.charSequence(); if (!Strings.equalCharSequence(cSeq, responseChunking.charSequence())) { String msg = "Char sequences must be same." + " Reference char seq=" + cSeq + " Response char seq=" + responseChunking.charSequence(); throw new IllegalArgumentException(msg); } sb.append("\n"); sb.append(formatHeader(5,referenceChunking)); // 5 is indent for " REF " and "RESP " sb.append("\n REF "); sb.append(formatChunks(referenceChunking)); sb.append("RESP "); sb.append(formatChunks(responseChunking)); sb.append("\n"); mLastCase = sb.toString(); mCases.add(new Chunking[] { referenceChunking, responseChunking }); // need mutable sets, so wrap Set refSet = unscoredChunkSet(referenceChunking); Set respSet = unscoredChunkSet(responseChunking); for (Chunk respChunk : respSet) { boolean inRef = refSet.remove(respChunk); ChunkAndCharSeq ccs = new ChunkAndCharSeq(respChunk,cSeq); if (inRef) { mTruePositiveSet.add(ccs); } else { mFalsePositiveSet.add(ccs); } } for (Chunk refChunk : refSet) { mFalseNegativeSet.add(new ChunkAndCharSeq(refChunk,cSeq)); } } static Set unscoredChunkSet(Chunking chunking) { Set result = new HashSet(); for (Chunk chunk : chunking.chunkSet()) result.add(ChunkFactory.createChunk(chunk.start(), chunk.end(), chunk.type())); return result; } /** * Returns the set of true positives. True positives are chunks * that were in both a reference and response chunking case. The * set returned contains instances of {@link ChunkAndCharSeq}, * which combine a chunk and a character sequence. * *

The set is unmodifiable, but tracks the changes in this * evaluator. * * @return The set of true positives. */ public Set truePositiveSet() { return Collections.unmodifiableSet(mTruePositiveSet); } /** * Returns the set of false positives. False positives are * response chunks that are not reference chunks. The set returned * contains instances of {@link ChunkAndCharSeq}, which combine a * chunk and a character sequence. * *

The set is unmodifiable, but tracks the changes in this * evaluator. * * @return The set of false positives. */ public Set falsePositiveSet() { return Collections.unmodifiableSet(mFalsePositiveSet); } /** * Returns the set of false negatives. False negatives are * reference chunks which are not response chunks. The set * returned contains instances of {@link ChunkAndCharSeq}, which * combine a chunk and a character sequence. * *

The set is unmodifiable, but tracks the changes in this * evaluator. * * @return The set of false negatives. */ public Set falseNegativeSet() { return Collections.unmodifiableSet(mFalseNegativeSet); } /** * Return the scored precision-recall evaluation for this chunker. * This is a copy of the precision-recall evaluation and changes to * it will not affect the results returned by this class. * * @return The precision-recall evaluation. */ public PrecisionRecallEvaluation precisionRecallEvaluation() { int tp = truePositiveSet().size(); int fn = falseNegativeSet().size(); int fp = falsePositiveSet().size(); return new PrecisionRecallEvaluation(tp,fn,fp,0); } /** * Returns the precision-recall evaluation for this chunking * as a string. * * @return This evaluation as a string. */ @Override public String toString() { return precisionRecallEvaluation().toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy