All Downloads are FREE. Search and download functionalities are using the official Maven repository.

simplenlg.format.english.MarkupFormatter Maven / Gradle / Ivy

There is a newer version: 2.2.0
Show newest version
/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 * License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is "Simplenlg".
 *
 * The Initial Developer of the Original Code is Ehud Reiter, Albert Gatt and Dave Westwater.
 * Portions created by Ehud Reiter, Albert Gatt and Dave Westwater are Copyright (C) 2010-11 The University of Aberdeen. All Rights Reserved.
 *
 * Contributor(s): Ehud Reiter, Albert Gatt, Dave Wewstwater, Roman Kutlak, Margaret Mitchell.
 */
package simplenlg.format.english;

import java.util.ArrayList;
import java.util.List;

import nlg.wrapper.Constants;
import simplenlg.features.DiscourseFunction;
import simplenlg.features.InternalFeature;
import simplenlg.framework.CoordinatedPhraseElement;
import simplenlg.framework.DocumentCategory;
import simplenlg.framework.DocumentElement;
import simplenlg.framework.ElementCategory;
import simplenlg.framework.ListElement;
import simplenlg.framework.NLGElement;
import simplenlg.framework.NLGModule;
import simplenlg.framework.StringElement;

import static simplenlg.format.english.MarkupMorphologyProcessor.propagateMarkup;

/**
 * 

* This processing module adds some simple plain text formatting to the * SimpleNLG output. This includes the following: *

    *
  • Adding the document title to the beginning of the text.
  • *
  • Adding section titles in the relevant places.
  • *
  • Adding appropriate new line breaks for ease-of-reading.
  • *
  • Adding list items with ' * '.
  • *
  • Adding numbers for enumerated lists (e.g., "1.1 - ", "1.2 - ", etc.)
  • *
*

* * @author D. Westwater, University of Aberdeen. * @version 4.0 * */ public class MarkupFormatter extends NLGModule { static private NumberedPrefix numberedPrefix = new NumberedPrefix(); @Override public void initialise() { // Do nothing } @Override public NLGElement realise(NLGElement element) { debug_println("*** (1)" + element); NLGElement realisedComponent = null; StringBuffer realisation = new StringBuffer(); if (element != null) { ElementCategory category = element.getCategory(); List components = element.getChildren(); debug_println("*** (1)" + category); //NB: The order of the if-statements below is important! // check if this is a canned text first if (element instanceof StringElement) { debug_println("*** (2)" + element ); debug_println("*** (2)" + components ); debug_println("*** (2)" + element.getAllFeatures() ); String elementRealisation = element.getRealisation(); String s; elementRealisation = addMarkupToRealisation(element, elementRealisation); realisation.append(elementRealisation); } else if (category instanceof DocumentCategory) { debug_println("*** (3)" + components ); // && element instanceof DocumentElement String title = element instanceof DocumentElement ? ((DocumentElement) element) .getTitle() : null; // String title = ((DocumentElement) element).getTitle(); switch ((DocumentCategory) category) { case DOCUMENT: appendTitle(realisation, title, 2); realiseSubComponents(realisation, components); break; case SECTION: appendTitle(realisation, title, 1); realiseSubComponents(realisation, components); break; case LIST: realiseSubComponents(realisation, components); break; case ENUMERATED_LIST: numberedPrefix.upALevel(); if (title != null) { realisation.append(title).append('\n'); } if (null != components && 0 < components.size()) { realisedComponent = realise(components.get(0)); if (realisedComponent != null) { realisation.append(realisedComponent.getRealisation()); } for (int i = 1; i < components.size(); i++) { if (realisedComponent != null && !realisedComponent.getRealisation().endsWith("\n")) { realisation.append(' '); } if(components.get(i).getParent().getCategory() == DocumentCategory.ENUMERATED_LIST) { numberedPrefix.increment(); } realisedComponent = realise(components.get(i)); if (realisedComponent != null) { realisation.append(realisedComponent.getRealisation()); } } } numberedPrefix.downALevel(); break; case PARAGRAPH: if (null != components && 0 < components.size()) { realisedComponent = realise(components.get(0)); if (realisedComponent != null) { realisation.append(realisedComponent.getRealisation()); } for (int i = 1; i < components.size(); i++) { if (realisedComponent != null) { realisation.append(' '); } realisedComponent = realise(components.get(i)); if (realisedComponent != null) { realisation.append(realisedComponent.getRealisation()); } } } realisation.append("\n\n"); break; case SENTENCE: //realisation.append(element.getRealisation()); realiseSubComponentsWithMarkup(realisation, element, components); stripLeadingCommas(realisation); capitaliseFirstLetter(realisation); terminateSentence(realisation, element.getFeatureAsBoolean(InternalFeature.INTERROGATIVE).booleanValue()); break; case LIST_ITEM: if(element.getParent() != null) { if(element.getParent().getCategory() == DocumentCategory.LIST) { realisation.append(" * "); } else if(element.getParent().getCategory() == DocumentCategory.ENUMERATED_LIST) { realisation.append(numberedPrefix.getPrefix() + " - "); } } for (NLGElement eachComponent : components) { realisedComponent = realise(eachComponent); if (realisedComponent != null) { realisation.append(realisedComponent .getRealisation()); if(components.indexOf(eachComponent) < components.size()-1) { realisation.append(' '); } } } //finally, append newline realisation.append("\n"); break; } // also need to check if element is a ListElement (items can // have embedded lists post-orthography) or a coordinate } else if (element instanceof ListElement) { debug_println("***(4a) " + element); debug_println("***(4a) " + ((element instanceof ListElement)? "ListElement" : "CoordinatedPhraseElement") + " !!! " + element.getFeatureAsString(Constants.MARKUP_ELEMENT)); String markup=null; if (element instanceof ListElement) { if ((markup = element.getFeatureAsString(Constants.MARKUP_ELEMENT)) != null) { realisation.append(startMarkup(markup, element.getFeatureAsString(Constants.MARKUP_ATTRIBUTES))); } } boolean first=true; for (NLGElement eachComponent : components) { realisedComponent = realise(eachComponent); if (realisedComponent != null) { if (first) { first=false; } else { realisation.append(' '); } realisation.append(realisedComponent.getRealisation()); } } if (markup!=null) { if (element instanceof ListElement) { realisation.append(endMarkup(markup)); } } } else if (element instanceof CoordinatedPhraseElement) { debug_println("***(4) " + element); debug_println("***(4) " + ((element instanceof ListElement)? "ListElement" : "CoordinatedPhraseElement") + " !!! " + element.getFeatureAsString(Constants.MARKUP_ELEMENT)); String markup=null; if (element instanceof CoordinatedPhraseElement) { if ((markup = element.getFeatureAsString(Constants.MARKUP_ELEMENT)) != null) { realisation.append(startMarkup(markup, element.getFeatureAsString(Constants.MARKUP_ATTRIBUTES))); } } boolean first=true; int index=0; int length=components.size(); //System.out.println("length: " + length); for (NLGElement eachComponent : components) { //System.out.println("index " + index + " " + eachComponent); realisedComponent = realise(eachComponent); if (realisedComponent != null) { if (first) { first=false; } else { realisation.append(' '); } if ((index != length - 2) && DiscourseFunction.CONJUNCTION.equals(eachComponent.getFeature(InternalFeature.DISCOURSE_FUNCTION))) { //from OrthographyProcessor, line 434 realisation.append(", "); //$NON-NLS-1$ } else { realisation.append(realisedComponent.getRealisation()); } } index++; } if (markup!=null) { if (element instanceof CoordinatedPhraseElement) { realisation.append(endMarkup(markup)); } } realisation= fixPunctuationAndMarkup(realisation); } } return new StringElement(realisation.toString()); } public StringBuffer fixPunctuationAndMarkup(StringBuffer realisation) { return new StringBuffer(realisation.toString() .replace(" ,", ",") .replace(" ,", ", ") .replace(" and
  • ", " and
  • ")); } /** * Adds the sentence terminator to the sentence. This is a period ('.') for * normal sentences or a question mark ('?') for interrogatives. * * @param realisation * the StringBuffer containing the current * realisation of the sentence. * @param interrogative * a boolean flag showing true if the * sentence is an interrogative, false otherwise. */ private void terminateSentence(StringBuffer realisation, boolean interrogative) { char character = realisation.charAt(realisation.length() - 1); if(character != '.' && character != '?') { if(interrogative) { realisation.append('?'); } else { realisation.append('.'); } } } /** * Remove recursively any leading spaces or commas at the start * of a sentence. * * @param realisation * the StringBuffer containing the current * realisation of the sentence. */ private void stripLeadingCommas(StringBuffer realisation) { char character = realisation.charAt(0); if(character == ' ' || character == ',') { realisation.deleteCharAt(0); stripLeadingCommas(realisation); } } /** * Capitalises the first character of a sentence if it is a lower case * letter. * * @param realisation * the StringBuffer containing the current * realisation of the sentence. */ private void capitaliseFirstLetter(StringBuffer realisation) { char character = realisation.charAt(0); if(character >= 'a' && character <= 'z') { character = (char) ('A' + (character - 'a')); realisation.setCharAt(0, character); } } public static void debug_println(String s) { // /System.out.println(s); } public String addMarkupToRealisation(NLGElement element, String elementRealisation) { String s; if (((s=element.getFeatureAsString(Constants.MARKUP_ELEMENT))!=null) && s!="") { elementRealisation= startMarkup(s,element.getFeatureAsString(Constants.MARKUP_ATTRIBUTES) ) + elementRealisation + endMarkup(s); } return elementRealisation; } static public String endMarkup(String s) { return ""; } static public String startMarkup(String s, String attributes) { String toInsert = ((attributes==null) || ("".equals(attributes))) ? "" : " " + attributes; return "<" + s + toInsert + ">"; } /** * realiseSubComponents -- Realises subcomponents iteratively. * @param realisation -- The current realisation StringBuffer. * @param components -- The components to realise. */ private void realiseSubComponents(StringBuffer realisation, List components) { NLGElement realisedComponent; for (NLGElement eachComponent : components) { realisedComponent = realise(eachComponent); if (realisedComponent != null) { realisation.append(realisedComponent .getRealisation()); } } } /** * realiseSubComponents -- Realises subcomponents iteratively. * @param realisation -- The current realisation StringBuffer. * @param element * @param components -- The components to realise. */ private void realiseSubComponentsWithMarkup(StringBuffer realisation, NLGElement element, List components) { String markup; if (((markup=element.getFeatureAsString(Constants.MARKUP_ELEMENT))!=null) && (markup!="")) { realisation.append(startMarkup(markup,element.getFeatureAsString(Constants.MARKUP_ATTRIBUTES) )); } NLGElement realisedComponent; boolean first=true; for (NLGElement eachComponent : components) { realisedComponent = realise(eachComponent); propagateMarkup(element, realisedComponent); if (realisedComponent != null) { if (first) { first=false; } else { realisation.append(' '); } realisation.append(realisedComponent .getRealisation()); } } if ((markup!=null) && (markup!="")) { realisation.append(endMarkup(markup)); } } /** * appendTitle -- Appends document or section title to the realised document. * @param realisation -- The current realisation StringBuffer. * @param title -- The title to append. * @param numberOfLineBreaksAfterTitle -- Number of line breaks to append. */ private void appendTitle(StringBuffer realisation, String title, int numberOfLineBreaksAfterTitle) { if (title != null && !title.isEmpty()) { realisation.append(title); for(int i = 0; i < numberOfLineBreaksAfterTitle; i++) { realisation.append("\n"); } } } @Override public List realise(List elements) { List realisedList = new ArrayList(); if (elements != null) { for (NLGElement eachElement : elements) { realisedList.add(realise(eachElement)); } } return realisedList; } }