All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.chunk.ChunkerEvaluator Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
/*
 * LingPipe v. 4.1.0
 * Copyright (C) 2003-2011 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

package com.aliasi.chunk;

import com.aliasi.classify.ScoredPrecisionRecallEvaluation;

import com.aliasi.corpus.ObjectHandler;

import com.aliasi.util.ObjectToCounterMap;
import com.aliasi.util.ScoredObject;
import com.aliasi.util.Strings;

import java.util.Formatter;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Set;

/**
 * The ChunkerEvaulator class provides an evaluation
 * framework for chunkers.  An instance of this class is constructed
 * based on the chunker to be evaluated.  This class implements the
 * {@code ObjectHandler} interface in order to receive reference
 * chunkings.  Reference chunkings may be added directly using the
 * {@link #handle(Chunking)} or by passing this handler to an
 * appropriate parser.  Either way, the sequence is extracted from the
 * reference chunking, the contained chunker is used to generate
 * a response chunking, and then the reference and response chunkings
 * are added to a contained {@link ChunkingEvaluation} which maintains
 * a running score.  The method {@link #evaluation()} returns the
 * contained chunking evaluation, which may be inspected for partial
 * results at any time.
 *
 * 

Thread Safety

* * Evaluators are not thread safe. In order to ensure thread safety, * read/write synchronization is required on the methods. Read * methods return scores, write methods alter the evaluator's state, * either by changing the underlying chunker or adding examples. * * @author Bob Carpenter * @version 4.0.0 * @since LingPipe2.1 */ public class ChunkerEvaluator implements ObjectHandler { private Chunker mChunker; private boolean mVerbose = false; // 1st-best private final ChunkingEvaluation mChunkingEvaluation; // n-best private final ObjectToCounterMap mCorrectRanks = new ObjectToCounterMap(); // conf eval private final ScoredPrecisionRecallEvaluation mConfEval = new ScoredPrecisionRecallEvaluation(); // n-best int mMaxNBest = 64; int mMaxNBestPrint = 8; String mLastNBestCase = null; // conf eval int mConfMaxChunks = 128; String mLastConfidenceCase = null; /** * Construct an evaluator for the specified chunker. * * @param chunker Chunker to evaluate. */ public ChunkerEvaluator(Chunker chunker) { mChunker = chunker; mChunkingEvaluation = new ChunkingEvaluation(); } /** * Returns the underlying chunker for this evaluator. * * @return The underlying chunker. */ public Chunker chunker() { return mChunker; } /** * Set the underlying chunker to the specified value. * * @param chunker New underlying chunker for this evaluator. */ public void setChunker(Chunker chunker) { mChunker = chunker; } /** * Sets the verbosity level of this evaluator to the specified * value. If the argument is true, calls to {@link * #handle(Chunking)} will print (to {@link System#out}) a report * for each chunking evaluation (first-best, n-best and * confidence). * *

The reports that are written are also available as strings * programmatically through the methods {@link #lastNBestCaseReport()}, and * {@link #lastConfidenceCaseReport()}, * * @param isVerbose true for standard output per * case. */ public void setVerbose(boolean isVerbose) { mVerbose = isVerbose; } /** * Returns a string-based representation of the last evaluation * case and the first-best result. * * @return The first-best report for the last case handled. */ public String lastFirstBestCaseReport() { return mChunkingEvaluation.mLastCase; } /** * Sets the maximum number of chunks extracted by a * confidence-based chunker for evaluation. * * @param n Number of chunks to extract with confidence. */ public void setMaxConfidenceChunks(int n) { mConfMaxChunks = n; } /** * Returns a string-based representation of the last evaluation * case's confidence evaluation. If there has not been an * evaluation case or the chunker being evaluated is not a * confidence-based chunker, this result will be * null. * * @return A string representation of the last case's confidence * evaluation. */ public String lastConfidenceCaseReport() { return mLastConfidenceCase; } /** * Sets the maximum number of chunkings extracted by an n-best * chunker for evaluation. * * @param n Number of chunkings to evaluate in n-best chunking. */ public void setMaxNBest(int n) { mMaxNBest = n; } /** * Sets the maximum number of chunkings that will be reported in a * case report. That is, chunkings reported through a call to the * the {@link #lastNBestCaseReport()} method. * * @param n Number of n-best results to print in a case report. */ public void setMaxNBestReport(int n) { mMaxNBestPrint = n; } /** * Returns a string-based representation of the last n-best * evaluation case. * * @return String representing the last n-best case evaluation. */ public String lastNBestCaseReport() { return mLastNBestCase; } /** * Handle the specified reference chunking encoded in the standard * BIO tag chunking format. If the whitespaces are null, * a single space character is used to separate tokens. * *

See {@link #handle(Chunking)} for * more information. * * @param tokens Array of tokens. * @param whitespaces Array of whitespaces. * @param tags Array of tags. */ void handle(String[] tokens, String[] whitespaces, String[] tags) { ChunkTagHandlerAdapter2 adapter = new ChunkTagHandlerAdapter2(this); adapter.handle(tokens,whitespaces,tags); } /** * Handle the specified reference chunking. This involves * running the chunker being evaluated over the reference * chunking's sequence to create a response chunking, which * is then added with the reference chunking as a case to * the chunking evaluation. * *

If the contained chunker returns null for * a given input, this method will fill in a chunking over * the appropriate sequence with no chunks for evaluation. * * @param referenceChunking The reference chunking case. */ public void handle(Chunking referenceChunking) { CharSequence cSeq = referenceChunking.charSequence(); // first-best Chunking firstBestChunking = mChunker.chunk(cSeq); if (firstBestChunking == null) firstBestChunking = new ChunkingImpl(cSeq); mChunkingEvaluation.addCase(referenceChunking,firstBestChunking); if (mChunker instanceof NBestChunker) { NBestChunker nBestChunker = (NBestChunker) mChunker; char[] cs = Strings.toCharArray(cSeq); StringBuilder sb = new StringBuilder(); sb.append(ChunkingEvaluation.formatHeader(13,referenceChunking)); sb.append(" REF " + ChunkingEvaluation.formatChunks(referenceChunking)); double score = Double.NEGATIVE_INFINITY; int foundRank = -1; int i = 0; Iterator> nBestIt = nBestChunker.nBest(cs,0,cs.length,mMaxNBest); Formatter formatter = new Formatter(sb,Locale.US); for (i = 0; i < mMaxNBest && nBestIt.hasNext(); ++i) { ScoredObject so = nBestIt.next(); score = so.score(); Chunking responseChunking = so.getObject(); if (i < mMaxNBestPrint) { formatter.format("%9d",i); sb.append(" "); formatter.format("%10.3f",score); sb.append(" "); sb.append(ChunkingEvaluation.formatChunks(responseChunking)); } if (responseChunking.equals(referenceChunking)) { sb.append(" -----------\n"); foundRank = i; } } if (foundRank < 0) sb.append("Correct Rank >=" + mMaxNBest + "\n\n"); else sb.append("Correct Rank=" + foundRank + "\n\n"); mCorrectRanks.increment(Integer.valueOf(foundRank)); mLastNBestCase = sb.toString(); } if (mChunker instanceof ConfidenceChunker) { ConfidenceChunker confChunker = (ConfidenceChunker) mChunker; char[] cs = Strings.toCharArray(cSeq); StringBuilder sb = new StringBuilder(); Set refChunks = new HashSet(); for (Chunk nextChunk : referenceChunking.chunkSet()) { Chunk zeroChunk = toUnscoredChunk(nextChunk); refChunks.add(zeroChunk); } sb.append(ChunkingEvaluation.formatHeader(5,referenceChunking)); Iterator nBestChunkIt = confChunker.nBestChunks(cs,0,cs.length,mConfMaxChunks); int count = 0; int missCount = refChunks.size(); while (nBestChunkIt.hasNext()) { Chunk nextChunk = nBestChunkIt.next(); double score = nextChunk.score(); Chunk zeroedChunk = toUnscoredChunk(nextChunk); boolean correct = refChunks.contains(zeroedChunk); if (correct) --missCount; sb.append((correct ? "TRUE " : "false") + " (" + nextChunk.start() + ", " + nextChunk.end() + ")" + ": " + nextChunk.type() + " " + nextChunk.score() + "\n"); mConfEval.addCase(correct,score); } mConfEval.addMisses(missCount); mLastConfidenceCase = sb.toString(); } report(); } void report() { if (!mVerbose) return; System.out.println(mChunkingEvaluation.mLastCase); if (mChunker instanceof NBestChunker) System.out.println(mLastNBestCase); if (mChunker instanceof ConfidenceChunker) System.out.println(mLastConfidenceCase); } /** * Returns the scored precision-recall evaluation derived from a * confidence-based chunker. If the chunker being evaluated is * not a confidence-based chunker, then this evaluation will be * empty. * *

This is the actual evaluation used by this class, so * changing it will affect this class's results. * * @return The scored precision/recall evaluation. */ public ScoredPrecisionRecallEvaluation confidenceEvaluation() { return mConfEval; } /** * Return the first-best chunking evaluation. * *

This is the actual evaluation used by this class, so * changing it will affect this class's results. * * @return The chunking evaluation. */ public ChunkingEvaluation evaluation() { return mChunkingEvaluation; } /** * Returns the n-best evaluation in the form of a mapping from * ranks to the number of times the reference chunking was that * rank in the evaluation. The ranks are instances of * Integer, with -1 being the rank * assigned to cases in which the reference chunking was not * among the n-best results. * *

This is the actual counter used by this class, so * changing it will affect this class's results. * *

If the chunker being evaluated is not an n-best chunker, * then this evaluation will be empty. * * @return The n-best evaluation. */ public ObjectToCounterMap nBestEvaluation() { return mCorrectRanks; } /** * Returns a string-based representation of this evaluation. * It will include the first-best evaluation. An n-best evaluation * and/or a confidence evaluation are included if defined. * * @return A string-based representation of this evaluator. */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("FIRST-BEST EVAL\n"); sb.append(evaluation().toString()); if (mChunker instanceof NBestChunker) { sb.append("\n\nN-BEST EVAL (rank=count)\n"); sb.append(nBestEvaluation().toString()); } if (mChunker instanceof ConfidenceChunker) { sb.append("\n\nCONFIDENCE EVALUATION"); sb.append(confidenceEvaluation().toString()); } return sb.toString(); } static Chunk toUnscoredChunk(Chunk c) { return ChunkFactory.createChunk(c.start(), c.end(), c.type()); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy