All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.joliciel.talismane.examples.TalismaneAPIExamples Maven / Gradle / Ivy

There is a newer version: 6.1.8
Show newest version
///////////////////////////////////////////////////////////////////////////////
//Copyright (C) 2015 Joliciel Informatique
//
//This file is part of Talismane.
//
//Talismane is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//Talismane is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU Affero General Public License for more details.
//
//You should have received a copy of the GNU Affero General Public License
//along with Talismane.  If not, see .
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.talismane.examples;

import java.util.List;

import com.joliciel.talismane.AnnotatedText;
import com.joliciel.talismane.TalismaneSession;
import com.joliciel.talismane.parser.ParseConfiguration;
import com.joliciel.talismane.parser.ParseTree;
import com.joliciel.talismane.parser.Parser;
import com.joliciel.talismane.parser.Parsers;
import com.joliciel.talismane.posTagger.PosTagSequence;
import com.joliciel.talismane.posTagger.PosTagger;
import com.joliciel.talismane.posTagger.PosTaggers;
import com.joliciel.talismane.rawText.RawText;
import com.joliciel.talismane.rawText.RawTextAnnotator;
import com.joliciel.talismane.rawText.Sentence;
import com.joliciel.talismane.sentenceAnnotators.SentenceAnnotator;
import com.joliciel.talismane.sentenceDetector.SentenceDetector;
import com.joliciel.talismane.tokeniser.TokenSequence;
import com.joliciel.talismane.tokeniser.Tokeniser;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;

import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;

/**
 * A class showing how to analyse a sentence using the Talismane API and an
 * existing language pack.
* * Usage (barring the classpath, which must include Talismane jars):
* *
 * java -Dconfig.file=[languagePackConfigFile] com.joliciel.talismane.examples.TalismaneAPITest
 * 
*/ public class TalismaneAPIExamples { public static void main(String[] args) throws Exception { OptionParser parser = new OptionParser(); OptionSpec exampleOption = parser.accepts("example", "which example to run").withRequiredArg().ofType(Integer.class); OptionSpec sessionIdOption = parser.accepts("sessionId", "the current session id - configuration read as talismane.core.[sessionId]") .withRequiredArg().required().ofType(String.class); if (args.length <= 1) { parser.printHelpOn(System.out); return; } OptionSet options = parser.parse(args); int example = 1; if (options.has(exampleOption)) { example = options.valueOf(exampleOption); } String sessionId = options.valueOf(sessionIdOption); if (example == 1) example1(sessionId); else example2(sessionId); } /** * An example tokenising, pos-tagging and parsing a pre-existing sentence. */ public static void example1(String sessionId) throws Exception { String text = "Les amoureux qui se bécotent sur les bancs publics ont des petites gueules bien sympathiques."; // load the Talismane configuration Config conf = ConfigFactory.load(); TalismaneSession session = new TalismaneSession(conf, sessionId); // tokenise the text Tokeniser tokeniser = Tokeniser.getInstance(session); TokenSequence tokenSequence = tokeniser.tokeniseText(text); // pos-tag the token sequence PosTagger posTagger = PosTaggers.getPosTagger(session); PosTagSequence posTagSequence = posTagger.tagSentence(tokenSequence); System.out.println(posTagSequence); // parse the pos-tag sequence Parser parser = Parsers.getParser(session); ParseConfiguration parseConfiguration = parser.parseSentence(posTagSequence); ParseTree parseTree = new ParseTree(parseConfiguration, true); System.out.println(parseTree); } /** * Similar to example1, but begins with filtering and sentence detection. */ public static void example2(String sessionId) throws Exception { String text = "Les gens qui voient de travers pensent que les bancs verts qu'on voit sur les trottoirs " + "sont faits pour les impotents ou les ventripotents. " + "Mais c'est une absurdité, car, à la vérité, ils sont là, c'est notoire, " + "pour accueillir quelque temps les amours débutants."; // load the Talismane configuration Config conf = ConfigFactory.load(); TalismaneSession session = new TalismaneSession(conf, sessionId); RawText rawText = new RawText(text, true, session); // filter the text - in the case where filters are defined // to skip certain parts of the text (e.g. XML) or to fix encoding // issues (e.g. replace " with ") for (RawTextAnnotator filter : session.getTextAnnotators()) { filter.annotate(rawText); } // retrieve the processed text after filters have been applied AnnotatedText processedText = rawText.getProcessedText(); // detect sentences SentenceDetector sentenceDetector = SentenceDetector.getInstance(session); sentenceDetector.detectSentences(processedText); // the detected sentences can be retrieved directly from the raw text // this allows annotations made on the sentences to get reflected in the // raw text List sentences = rawText.getDetectedSentences(); for (Sentence sentence : sentences) { // apply any sentence annotators to prepare the text for analysis // via deterministic rules (e.g. token boundaries or pos-tag // assignment for a given word) for (SentenceAnnotator annotator : session.getSentenceAnnotators()) { annotator.annotate(sentence); } // tokenise the text Tokeniser tokeniser = Tokeniser.getInstance(session); TokenSequence tokenSequence = tokeniser.tokeniseSentence(sentence); // pos-tag the token sequence PosTagger posTagger = PosTaggers.getPosTagger(session); PosTagSequence posTagSequence = posTagger.tagSentence(tokenSequence); System.out.println(posTagSequence); // parse the pos-tag sequence Parser parser = Parsers.getParser(session); ParseConfiguration parseConfiguration = parser.parseSentence(posTagSequence); System.out.println(parseConfiguration); ParseTree parseTree = new ParseTree(parseConfiguration, true); System.out.println(parseTree); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy