All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.force.i18n.grammar.parser.GrammaticalLabelFileHandler Maven / Gradle / Ivy

There is a newer version: 1.2.30
Show newest version
/*
 * Copyright (c) 2017, salesforce.com, inc.
 * All rights reserved.
 * Licensed under the BSD 3-Clause license.
 * For full license text, see LICENSE.txt file in the repo root  or https://opensource.org/licenses/BSD-3-Clause
 */

package com.force.i18n.grammar.parser;

import static com.force.i18n.commons.util.settings.IniFileUtil.intern;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.function.Function;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

import org.xml.sax.*;

import com.force.i18n.*;
import com.force.i18n.commons.text.DeferredStringBuilder;
import com.force.i18n.commons.text.TextUtil;
import com.force.i18n.grammar.*;
import com.force.i18n.grammar.parser.GrammaticalLabelFileParser.ErrorInfo;
import com.force.i18n.grammar.parser.GrammaticalLabelFileParser.ErrorType;
import com.force.i18n.settings.PropertyFileData;
import com.force.i18n.settings.TrackingHandler;
import com.google.common.collect.ImmutableSet;

/**
 * @author stamm
 */
class GrammaticalLabelFileHandler extends TrackingHandler {
    private static final Logger logger = Logger.getLogger(GrammaticalLabelFileHandler.class.getName());

    private static final boolean LOG_DUPLICATE_LABELS = "true".equals(I18nJavaUtil.getProperty("logDuplicateLabels"));  // Don't log duplicate as a matter of course because they are often duplicated to ease translation

    private final GrammaticalLabelFileParser parser;
    private final URL baseDir;
    private final PropertyFileData data;

    private BaseTag currentTag = null;
    private BaseTag currentParam = null;
    private SectionTag currentSection = null;


    /**
     * Package protected constructor used by the LabelParser.
     *
     * @param dataFile
     *            XML file this handler is reading.
     * @param parser
     *            parser that is using this handler and thus the overall parse context.
     */
    GrammaticalLabelFileHandler(URL dataFile, PropertyFileData data, GrammaticalLabelFileParser parser) {
    	super(dataFile);
        assert dataFile != null && data != null && parser != null;
        this.data = data;
        this.parser = parser;
        // Directory from which we're loading data.
        this.baseDir = dataFile;

        if (LabelDebug.isLabelHintAllowed()) {
            BASE_FILE = dataFile.getPath();
            this.sectionToFileName = parser.getSectionToFileName();
        }
    }

    final Level getProblemLogLevel() {
        return LanguageProviderFactory.get().getBaseLanguage() == parser.getDictionary().getLanguage() ? Level.INFO : Level.FINE; // TODO: This should turn into INFO;
    }

    final Level getSevereProblemLogLevel() {
        return LanguageProviderFactory.get().getBaseLanguage() == parser.getDictionary().getLanguage() ? Level.SEVERE : Level.FINE; // TODO: This should turn into INFO for other langs;
    }

    int getLineNumber() {
        Locator loc = super.getLocator();
        return loc == null ? 0 : loc.getLineNumber();
    }

    URL getFileURL() {
        return super.getFile();
    }

    /**
     * @see org.xml.sax.ContentHandler#characters(char[], int, int)
     */
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        if (this.currentTag != null)
            this.currentTag.characters(ch, start, length);
    }

    /**
     * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String,
     *      org.xml.sax.Attributes)
     */
    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        if (this.currentTag != null) {
            this.currentTag.startElement(uri, localName, qName, attributes);
        } else if (ROOT.equals(localName)) {
            this.currentTag = new RootTag();
///CLOVER:OFF
        } else {
            throw new SAXNotSupportedException("bad tag:" + localName);
///CLOVER:ON
        }
    }

    /**
     * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
     */
    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        if (this.currentTag != null) {
            if (localName.equals(this.currentTag.getTagName())) {
                this.currentTag.endElement();
                this.currentTag = this.currentTag.getParent();
///CLOVER:OFF
            } else {
                logger.log(getProblemLogLevel(), "###\tBad end tag <" + localName + "> found, ignored: " + getLineNumberString());
///CLOVER:ON
            }
        }
    }

    // -----------------------------------------------------------------------------
    // Inner classes for each tag implementation
    //
    // -----------------------------------------------------------------------------
    // tag name
    static final String ROOT = "iniFile";
    static final String SECTION = "section";
    static final String PUBLIC = "public";
    static final String PARAM = "param";
    static final String IMPORT = "import";
    static final String IMAGE = "image";
    static final String ENTITY = "entity";
    static final String MOD = "modifier";
    static final String ESCAPE_HTML = "escapeHtml";
    static final String NUM = "num";
    static final String PLURAL = "plural";
    static final String GENDER = "gender";
    static final String COUNTER = "counter";  // Used for classifier
    static final String WHEN = "when";

    // attributes
    static final String NAME = "name";
    static final String ALIAS = "alias";
    static final String ERROR = "error";
    static final String TYPE = "type";
    static final String VAL = "val";
    static final String DEFAULT = "default";

    static final String YES = "y";
    static final String NO = "n";

    GrammaticalLabelFileParser getParser() { return this.parser; }
    LanguageDictionary getDictionary() { return getParser().getDictionary(); }

    /**
     * The base class for each tag implementation.
     */
    abstract class BaseTag {
        private BaseTag parent = null;
        private String name;
        private Attributes attributes;

        BaseTag() {}

        BaseTag(BaseTag parent, Attributes atts) throws SAXParseException {
            this.parent = parent;
            this.attributes = atts;
            if (atts != null) {
                String n = atts.getValue(NAME);
                this.name = (n != null) ? intern(n.trim()) : null;
///CLOVER:OFF
                if (this.name == null && isNameRequired())
                    throw new SAXParseException("Missing required attribuite:" + NAME, getLocator());
///CLOVER:ON
            }
        }

        final BaseTag getParent() {
            return this.parent;
        }

        abstract String getTagName();

        final String getName() {
            return this.name;
        }

        Attributes getAttributes() {
            return this.attributes;
        }

        /**
         * Pass through the SAX event for characters
         */
        void characters(char[] ch, int start, int length) {}

        /**
         * Pass through the SAX event for starting the element
         */
        void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {}

        /**
         * Pass through the SAX event for ending the last element
         */
        void endElement() {}

        /**
         * @return whether the name attribute is required for this tag
         */
        boolean isNameRequired() {
            return false;
        }

        /**
         * hook to remember enclosing tags
         */
        void addTag(RefTag tag) {}

    }

    class BadTag extends BaseTag {
        private final String tagName;

        BadTag(BaseTag parent, String localName) throws SAXParseException {
            super(parent, null);
            this.tagName = localName;
            logger.log(getProblemLogLevel(), "###\tBad tag <" + localName + "> found, ignored: " + getLineNumberString());
        }

        @Override
        String getTagName() {
            return this.tagName;
        }
    }

    /**
     * The root tag - <iniFile>. The root tag can only contain <section> or <import> tag
     */
    private class RootTag extends BaseTag {
        @Override
        String getTagName() {
            return ROOT;
        }

        @Override
        void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            if (SECTION.equals(localName)) {
                currentTag = new SectionTag(this, attributes);

            } else if (IMPORT.equals(localName)) {
                try {
                    currentTag = new ImportTag(this, attributes);
                } catch (MalformedURLException e) {
                    throw new IllegalArgumentException(e);
                }

            } else {
                currentTag = new BadTag(this, localName);
            }
        }
    }

    private class ImportTag extends BaseTag {
        @Override
        String getTagName() {
            return IMPORT;
        }

        ImportTag(RootTag parent, Attributes atts) throws SAXParseException, MalformedURLException {
            super(parent, atts);

            // Fire up the new parser and populate the inifile with the retrieved data.
            getParser().parseLabels(GrammaticalLabelFileHandler.this.data, new URL(baseDir, atts.getValue(0)));
        }
    }

    /**
     * The section tag - <section>. The section tag contains either <param> or <img> tag
     */
    private class SectionTag extends BaseTag {
        @Override
        String getTagName() {
            return SECTION;
        }

        SectionTag(RootTag parent, Attributes atts) throws SAXParseException {
            super(parent, atts);

            if (LabelDebug.isLabelHintAllowed()) {
                sectionToFileName.put(getName(), BASE_FILE);
            }

            currentSection = this;
            boolean isSectionPublic = "true".equalsIgnoreCase(atts.getValue(PUBLIC));
            if (isSectionPublic) {
                GrammaticalLabelFileHandler.this.data.setSectionAsPublic(this.getName());
            }
        }

        @Override
        void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            if (PARAM.equals(localName)) {
                currentTag = new ParamTag(this, attributes);
            } else {
                currentTag = new BadTag(this, localName);
            }
        }

        @Override
        void endElement() {
            currentSection = null;
        }

        @Override
        boolean isNameRequired() {
            return true;
        }

        void addLabelData(String name, Object data) {
///CLOVER:OFF
            if (LOG_DUPLICATE_LABELS) {
                Object existingValue = GrammaticalLabelFileHandler.this.data.put(getName(), name, data);
                if (existingValue != null) { // TODO: Chinese is stored with a zh_TW being the default
                    HumanLanguage lang = LanguageProviderFactory.get().getProvider().getLanguage(GrammaticalLabelFileHandler.this.data.getLocale());
                    // TODO SLT: fix this.
                    if (!ImmutableSet.of(LanguageConstants.CHINESE_TW, LanguageConstants.SPANISH_MX).contains(lang.getLocaleString())) {
                        logger.log(getProblemLogLevel(), "Redundant label found: " + name + " in section: " + getName()
                                + ", Existing Value: " + existingValue);
                    }
                }
///CLOVER:ON
            } else {
                // Java compiler doesn't optimize away a dead local store if the user of the variable is inside a dead-code optimized block.
                GrammaticalLabelFileHandler.this.data.put(getName(), name, data);
            }
        }
    }

    /**
     * The param tag - <param name="param1">. The param tag takes three attributes
     * 
    *
  • name: parameter name. this is always required. *
  • alias: Use alias to make this param refers to the other label. e.g. <param name="val1" * alias="sec1.val1"/> *
  • entity: specify default entity name. This is used if modifier tag has no entity attribute specified, * and if there is no nouns defined. *
*/ private class ParamTag extends StringTag { @Override String getTagName() { return PARAM; } ParamTag(SectionTag parent, Attributes atts) throws SAXParseException { super(parent, atts); currentParam = this; } @Override boolean isNameRequired() { return true; } @Override void endElement() { super.endElement(); currentParam = null; } @Override void addLabelDataToParent(Object data) { ((SectionTag)getParent()).addLabelData(getName(), data); } } /** * The param tag - <param name="param1">. The param tag takes three attributes *
    *
  • name: parameter name. this is always required. *
  • alias: Use alias to make this param refers to the other label. e.g. <param name="val1" * alias="sec1.val1"/> *
  • entity: specify default entity name. This is used if modifier tag has no entity attribute specified, * and if there is no nouns defined. *
*/ private abstract class StringTag extends BaseTag { private boolean isAlias; private StringBuilder sb; private final ArrayList values; // the type is specific (eg not List<>) to allow cloning StringTag(BaseTag parent, Attributes atts) throws SAXParseException { super(parent, atts); this.values = new ArrayList<>(); this.isAlias = false; // if alias is specified, ignore all remaining attributes/contents String alias = atts.getValue(ALIAS); if (alias != null) { int i = alias.indexOf('.'); if (i <= 0) logger.log(getProblemLogLevel(), "###\tBad alias name " + alias + " at " + parent.getName() + "." + getName() + " in " + getDictionary().getLanguage()); else { this.isAlias = true; getParser().addAlias(parent.getName(), getName(), alias.substring(0, i), alias.substring(i + 1), getFile(), getLocator().getLineNumber()); } } if (!this.isAlias) { // make sure we no longer have alias on it. This is for the case that english label // has this param as an alias, but overriding file has not. getParser().removeAlias(parent.getName(), getName()); } } private void addBufferedText() { String s = convertEscapedToUnicode(this.sb.toString()); // If we ever wanted to keep large labels on disk/out of memory, // this would be great place to inject something. Perhaps storing // in sleepycat or somesuch thing. s = getParser().uniquefy(s); this.values.add(s); } @Override void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // if this is alias specified, ignore any internal tag if (this.isAlias) return; // push buffered text into array if (this.sb != null && this.sb.length() > 0) { addBufferedText(); this.sb = null; } String lowerName = localName.toLowerCase(); // or if (ENTITY.equals(lowerName)) { currentTag = new NounTag(this, attributes, localName, true); } else if (PLURAL.equals(lowerName)) { currentTag = new PluralTag(this, attributes, localName); } else if (GENDER.equals(lowerName)) { currentTag = new GenderTag(this, attributes, localName); } else if (getParser().getDictionary().isAdjective(lowerName)) { currentTag = new AdjTag(this, attributes, localName); } else if (getParser().getDictionary().isArticle(lowerName)) { currentTag = new ArtTag(this, attributes, localName); } else if (COUNTER.equals(lowerName)) { currentTag = new CounterTag(this, attributes, localName); } else { currentTag = new NounTag(this, attributes, localName, false); } } @Override void characters(char[] ch, int start, int length) { if (!isAlias) { if (this.sb == null) this.sb = new StringBuilder(); this.sb.append(ch, start, length); } } /** * Add the label data to the parent object * @param data */ abstract void addLabelDataToParent(Object data); @Override void endElement() { if (this.isAlias) { // register to name only - leave as null at this point addLabelDataToParent(null); } else { // push any remaining string if (this.sb != null && this.sb.length() > 0) { if (getParser().trackDupes()) { getParser().trackLabel(this.sb.toString(), getFile().getPath() + ":" + ((SectionTag)getParent()).getName() + "." + getName()); } addBufferedText(); } if (values.isEmpty()) { addLabelDataToParent(""); } else { if (this.values.size() == 1) { addLabelDataToParent(this.values.get(0)); } else { List rawData = finishParsingLabelsNew(this.values); addLabelDataToParent(rawData); } } } } protected List finishParsingLabelsNew(List values) { // The goal here is to determine noun phrases correctly. We do that by assuming that noun phrases are of the following form // [article] [adjective]+ noun [adjective]+ // We need to: // Fix up the articles based on the noun and the next term // Fix up the adjective based on the noun, the next term, and the article (whether from an article tag or from the noun) // Possibly fix up the noun based on the article tag for those languages that have a distinct article particle and a distinct form based on definitiveness // So there's a two pass algorithm. In the first, we figure out the "noun" phrases List phrases = new ArrayList<>(); NounPhrase curPhrase = new NounPhrase(values); for (int i = 0; i < values.size(); i++) { Object o = values.get(i); if (o instanceof ArticleRefTag) { if (curPhrase.isNounSet()) { curPhrase = new NounPhrase(values); // Demarcate as a new phrase } curPhrase.setArticleLoc(i); } else if (o instanceof NounRefTag) { if (curPhrase.isNounSet()) { curPhrase = new NounPhrase(values); // New noun phrase } phrases.add(curPhrase); // Once there's a noun, there's a noun phrase curPhrase.setNounLoc(i); } else if (o instanceof AdjectiveRefTag) { AdjectiveRefTag refTag = (AdjectiveRefTag)o; Adjective adj = getDictionary().getAdjective(refTag.getName()); LanguagePosition pos = refTag.getDeclensionOverrides().getPosition(); // Allow for position overrides in case it's "different" if (pos == null) pos = adj.getPosition(); if (pos == LanguagePosition.PRE && curPhrase.isNounSet()) { curPhrase = new NounPhrase(values); // Demarcate as a new phrase } if (adj.isCopiedFromDefault()) { logger.log(getSevereProblemLogLevel(), "Adjective copied from english for " + getFile().getPath() + ":" + ((SectionTag)getParent()).getName() + "." + getName()); } // add to the last noun phrase curPhrase.addAdjectiveLoc(i); } else if (o instanceof AdnominalRefTag) { if (curPhrase.isNounSet()) { curPhrase = new NounPhrase(values); // Demarcate as a new phrase } curPhrase.addChoiceLoc(i); } } if (!curPhrase.isNounSet()) { if (phrases.isEmpty()) { logger.log(getSevereProblemLogLevel(), "Adjective used without a noun for " + getFile().getPath() + ":" + ((SectionTag)getParent()).getName() + "." + getName()); return values; // Don't deal } else { // Chances are we have a dangling adjective (like ()) if (phrases.size() > 1) { // Log only if there's an actual issue (multiple nouns) logger.log(getSevereProblemLogLevel(), "Prepositional Adjective used in post-position for " + getFile().getPath() + ":" + ((SectionTag)getParent()).getName() + "." + getName()); } // Add the adjectives to the last noun phrase and keep going. NounPhrase lastPhrase = phrases.get(phrases.size()-1); for (Integer i : curPhrase.getAdjectiveLocs()) { lastPhrase.addAdjectiveLoc(i); } for (Integer i : curPhrase.getChoiceLocs()) { lastPhrase.addChoiceLoc(i); } } } boolean isEndsWithLanguage = getDictionary().getDeclension().hasEndsWith(); for (NounPhrase phrase : phrases) { Object o = values.get(phrase.nounLoc); assert o instanceof NounRefTag : "Invalid noun location " + phrase.nounLoc + " in " + phrase; NounRefTag nounRef = phrase.getNounRefTag(); ArticleRefTag articleRef = phrase.getArticleRefTag(); // Handle the article if (articleRef != null) { // Fix up the article of the noun based on the particle if it's interesting. if (getDictionary().getDeclension().shouldInferNounDefArticleFromParticle()) { LanguageArticle articleType = ((Article)articleRef.resolveModifier(getDictionary())).getArticleType(); // Update the noun with the article if (articleType == LanguageArticle.DEFINITE) { nounRef = nounRef.makeArticled(getDictionary(), articleType); values.set(phrase.getNounLoc(), nounRef); } } // Figure out the "next thing" if (phrase.hasAdjectives()) { int nextAdjective = phrase.getAdjectiveLoc(isEndsWithLanguage ? -1 : 0); values.set(phrase.getArticleLoc(), articleRef.fixupModifier(nounRef, nextAdjective < phrase.getNounLoc() ? (TermRefTag) values.get(nextAdjective) : nounRef)); } else { values.set(phrase.getArticleLoc(), articleRef.fixupModifier(nounRef, nounRef)); } } // Handle special case for bulgarian where all noun inflections should move to the first modifier automatically if there is a modifier // Modify the noun to have no inflection, and move the modifiers back to the adjective ref tag. if (getDictionary().getDeclension().moveNounInflectionToFirstModifier() && phrase.hasAdjectives()) { NounRefTag nounTag = phrase.getNounRefTag(); NounForm nounForm = nounTag.getForm(); AdjectiveRefTag firstAdjectiveTag = (AdjectiveRefTag) values.get(phrase.getAdjectiveLoc(0)); if (nounForm.getArticle() != LanguageArticle.ZERO || nounForm.getCase() != LanguageCase.NOMINATIVE) { nounRef = nounTag.makeUninflected(getDictionary()); values.set(phrase.getNounLoc(), nounRef); values.set(phrase.getAdjectiveLoc(0), firstAdjectiveTag.fixupModifier(nounRef, nounRef, nounForm)); } } // Now go through the adjectives. LanguageArticle articleOverride = articleRef != null ? getDictionary().getArticle(articleRef.getName()).getArticleType() : null; if (isEndsWithLanguage) { for (int i = phrase.getAdjectiveLocs().size()-1; i >= 0; i--) { AdjectiveRefTag adjTag = (AdjectiveRefTag) values.get(phrase.getAdjectiveLoc(i)); // Figure out the "next" object if (i > 0) { int prevAdjective = phrase.getAdjectiveLoc(i-1); values.set(phrase.getAdjectiveLoc(i), adjTag.fixupModifier(nounRef, prevAdjective > phrase.getNounLoc() ? (TermRefTag) values.get(prevAdjective) : nounRef, articleOverride)); } else { // TODO: Not sure this is right. values.set(phrase.getAdjectiveLoc(i), adjTag.fixupModifier(nounRef, nounRef, articleOverride)); } } } else { for (int i = 0; i < phrase.getAdjectiveLocs().size(); i++) { AdjectiveRefTag adjTag = (AdjectiveRefTag) values.get(phrase.getAdjectiveLoc(i)); // Figure out the "next" object if (i < phrase.getAdjectiveLocs().size() - 1) { int nextAdjective = phrase.getAdjectiveLoc(i+1); values.set(phrase.getAdjectiveLoc(i), adjTag.fixupModifier(nounRef, nextAdjective < phrase.getNounLoc() ? (TermRefTag) values.get(nextAdjective) : nounRef, articleOverride)); } else { values.set(phrase.getAdjectiveLoc(i), adjTag.fixupModifier(nounRef, nounRef, articleOverride)); } } } // Now do choice phrases for (int i = 0; i < phrase.getChoiceLocs().size(); i++) { AdnominalRefTag choiceTag = (AdnominalRefTag) values.get(phrase.getChoiceLoc(i)); values.set(phrase.getChoiceLoc(i), choiceTag.getWithResolvedNounTag(nounRef)); } } return values; } @Override void addTag(RefTag tag) { values.add(tag); } } /** * Represents the locations of various elements of a noun phrase in a label. * @author stamm */ static class NounPhrase { private final List values; private int nounLoc = -1; private int articleLoc = -1; private List adjectiveLocs = new ArrayList<>(); private List choiceLocs = new ArrayList<>(); public NounPhrase(List values) { this.values = values; } public NounRefTag getNounRefTag() { return nounLoc == -1 ? null : (NounRefTag) values.get(nounLoc); } public ArticleRefTag getArticleRefTag() { return articleLoc == -1 ? null : (ArticleRefTag) values.get(articleLoc); } public void setNounLoc(int i) { this.nounLoc = i; assert values.get(i) instanceof NounRefTag : "Illegal noun loc"; } public void setArticleLoc(int i) { this.articleLoc = i; assert values.get(i) instanceof ArticleRefTag : "Illegal article loc"; } public int getArticleLoc() { return this.articleLoc; } public int getNounLoc() { return this.nounLoc; } public void addAdjectiveLoc(int i) { this.adjectiveLocs.add(i); assert values.get(i) instanceof AdjectiveRefTag : "Illegal adjective loc"; } public List getAdjectiveLocs() { return this.adjectiveLocs; } public int getAdjectiveLoc(int offset) { if (offset == -1) { // Support -1 to get the "Last" adjective return this.adjectiveLocs.get(this.adjectiveLocs.size()-1); } return this.adjectiveLocs.get(offset); } public void addChoiceLoc(int i) { this.choiceLocs.add(i); assert values.get(i) instanceof AdnominalRefTag : "Illegal choice loc"; } public List getChoiceLocs() { return this.choiceLocs; } public int getChoiceLoc(int offset) { if (offset == -1) { // Support -1 to get the "Last" adjective return this.choiceLocs.get(this.choiceLocs.size()-1); } return this.choiceLocs.get(offset); } public boolean isNounSet() { return nounLoc != -1; } public boolean hasAdjectives() { return !this.adjectiveLocs.isEmpty(); } public boolean hasChoices() { return !this.choiceLocs.isEmpty(); } public int getPhraseStart() { if (articleLoc != -1) return articleLoc; if (adjectiveLocs.isEmpty()) return nounLoc; return Math.min(adjectiveLocs.get(0), nounLoc); } public int getPhraseEnd() { if (adjectiveLocs.isEmpty()) return nounLoc; return Math.max(adjectiveLocs.get(adjectiveLocs.size()-1), nounLoc); } @Override public String toString() { return values.subList(getPhraseStart(), getPhraseEnd() + 1).toString(); } } /** * Entity or any noun tag - e.g. <Accounts/>, <Account_Name/>, <Entity ref="0"/> */ private class NounTag extends BaseTag { private final NounRefTag nounTag; private final String tagName; @Override String getTagName() { return this.tagName; } NounTag(StringTag parent, Attributes atts, String localName, boolean isEntityTag) throws SAXParseException { super(parent, atts); this.tagName = localName; String entityName = isEntityTag ? atts.getValue(ENTITY) : localName; String entityAttr = isEntityTag ? localName : null; this.nounTag = constructNounTag(entityName, atts, entityAttr); if (this.nounTag == null) { ErrorInfo error = parser.addInvalidLabel(ErrorType.UnknownEntity, currentSection.getName(), currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(), GrammaticalLabelFileHandler.this.getLineNumber(), entityName); logger.log(getProblemLogLevel(), "###\t" + error.getMessage()); } } ///CLOVER:OFF @Override void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // no inner tag is allowed currentTag = new BadTag(this, localName); } ///CLOVER:ON @Override void endElement() { if (this.nounTag != null) getParent().addTag(this.nounTag); } } /** * Adjective tag. <New entity="Accounts"/> */ class AdjTag extends BaseTag { AdjectiveRefTag modTag; private final String tagName; @Override String getTagName() { return this.tagName; } AdjTag(StringTag parent, Attributes atts, String localName) throws SAXParseException { super(parent, atts); this.tagName = localName; String lowerName = tagName.toLowerCase(); // handle case String entityName = atts.getValue(ENTITY); NounRefTag refTag = constructNounTag(entityName, atts, entityName == null || !Character.isDigit(entityName.charAt(0)) ? null : entityName); TermAttributes attributes = new TermAttributes(getDictionary().getDeclension(), atts, false); this.modTag = AdjectiveRefTag.getAdjectiveRefTag(lowerName, refTag, refTag, Character.isUpperCase(tagName.charAt(0)), attributes); } ///CLOVER:OFF @Override void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // no inner tag is allowed currentTag = new BadTag(this, localName); } ///CLOVER:ON @Override void endElement() { if (this.modTag != null) getParent().addTag(this.modTag); } } /** * Adjective tag. <New entity="Accounts"/> */ class ArtTag extends BaseTag { ArticleRefTag modTag; private final String tagName; @Override String getTagName() { return this.tagName; } ArtTag(StringTag parent, Attributes atts, String localName) throws SAXParseException { super(parent, atts); this.tagName = localName; String lowerName = tagName.toLowerCase(); // handle case String entityName = atts.getValue(ENTITY); NounRefTag refTag = constructNounTag(entityName, atts, entityName == null || !Character.isDigit(entityName.charAt(0)) ? null : entityName); TermAttributes attributes = new TermAttributes(getDictionary().getDeclension(), atts, false); this.modTag = ArticleRefTag.getArticleRefTag(lowerName, refTag, refTag, Character.isUpperCase(tagName.charAt(0)), attributes); } @Override void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // no inner tag is allowed currentTag = new BadTag(this, localName); } @Override void endElement() { if (this.modTag != null) getParent().addTag(this.modTag); } } /** * Conter tag. <Counter/> */ class CounterTag extends BaseTag { @Override String getTagName() { return COUNTER; } CounterTag(StringTag parent, Attributes atts, String localName) throws SAXParseException { super(parent, atts); } ///CLOVER:OFF @Override void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // no inner tag is allowed currentTag = new BadTag(this, localName); } ///CLOVER:ON @Override void endElement() { getParent().addTag(new CounterRefTag(null)); } } /** * Represents a tag that can have "when" tags, or, when the when's are unspecified, a default value. */ private abstract class ChoiceTag> extends StringTag { private Map perCategory; private Object defaultVal; // Some choices have a default that is a specific categories, others are explicit based on the resolution of the noun. ChoiceTag(StringTag parent, Attributes atts, String localName, Class type) throws SAXParseException { super(parent, atts); perCategory = new EnumMap<>(type); defaultVal = null; } @Override void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (localName.equalsIgnoreCase(WHEN)) { currentTag = new WhenTag(this, attributes, localName); } else { super.startElement(uri, localName, qName, attributes); } } void addLabelData(E val, Object data) { if (val == null) return; // This means we were invalid Object previous = perCategory.put(val, data); if (previous != null) { ErrorInfo error = parser.addInvalidLabel(ErrorType.DuplicateWhen, currentSection.getName(), currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(), GrammaticalLabelFileHandler.this.getLineNumber(), val); logger.log(getProblemLogLevel(), "###\t" + error.getMessage()); } } /** * @return the default category to use for the unadorned tags, or when with isDefault="true" * Can be null */ abstract E getDefaultCategory(); abstract Function getCategoryFromLabel(); void setDefault(Object data) { if (getDefaultCategory() == null) { this.defaultVal = data; } else { if (perCategory.containsKey(getDefaultCategory())) { ErrorInfo error = parser.addInvalidLabel(ErrorType.BadDefault, currentSection.getName(), currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(), GrammaticalLabelFileHandler.this.getLineNumber(), getDefaultCategory()); logger.log(getProblemLogLevel(), "###\t" + error.getMessage()); } else { addLabelData(getDefaultCategory(), data); } } } Object getDefault() { return getDefaultCategory() != null ? getPerCategory().get(getDefaultCategory()) : this.defaultVal; } Map getPerCategory() { return perCategory; } @Override void addLabelDataToParent(Object data) { // If it's empty string or just whitespace, meh. if (data == null || (data instanceof String && TextUtil.isNullEmptyOrWhitespace((String)data))) return; setDefault(data); } } /** * Plural tag with choices <plural val="0"> with embedded choices. The "non" when tags */ private class PluralTag extends ChoiceTag { private final int num; @Override String getTagName() { return PLURAL; } PluralTag(StringTag parent, Attributes atts, String localName) throws SAXParseException { super(parent, atts, localName, PluralCategory.class); int _num; try { _num = Integer.parseInt(atts.getValue(NUM)); } catch (NumberFormatException ex) { _num = 0; ErrorInfo error = parser.addInvalidLabel(ErrorType.BadPluralReference, currentSection.getName(), currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(), GrammaticalLabelFileHandler.this.getLineNumber(), atts.getValue(NUM)); logger.log(getProblemLogLevel(), "###\t" + error.getMessage()); } num = _num; } @Override PluralCategory getDefaultCategory() { return PluralCategory.OTHER; } @Override Function getCategoryFromLabel() { return a -> PluralCategory.fromLabel(a); } @Override void endElement() { super.endElement(); getParent().addTag(new PluralRefTag(num, getPerCategory(), getDefault())); } } /** * Plural tag with choices <plural val="0"> with embedded choices. The "non" when tags */ private class GenderTag extends ChoiceTag { @Override String getTagName() { return GENDER; } GenderTag(StringTag parent, Attributes atts, String localName) throws SAXParseException { super(parent, atts, localName, LanguageGender.class); } @Override LanguageGender getDefaultCategory() { return null; } @Override Function getCategoryFromLabel() { return a -> LanguageGender.fromLabelValue(a); } @Override void endElement() { super.endElement(); getParent().addTag(new GenderRefTag(null, getPerCategory(), getDefault())); } } /** * when tag to specify choices <when ="0"> with embedded choices */ private class WhenTag> extends StringTag { private final List categories; @Override String getTagName() { return WHEN; } WhenTag(ChoiceTag parent, Attributes atts, String localName) throws SAXParseException { super(parent, atts); boolean isDefault; List val = TextUtil.splitSimple(",", atts.getValue(VAL)); isDefault = YES.equalsIgnoreCase(atts.getValue(DEFAULT)); List newCategories = val != null ? val.stream().map(parent.getCategoryFromLabel()).collect(Collectors.toList()) : null; if (newCategories == null || newCategories.stream().anyMatch(a -> a == null)) { if (isDefault && newCategories == null) { newCategories = Collections.singletonList(parent.getDefaultCategory()); } else { ErrorInfo error = parser.addInvalidLabel(ErrorType.BadCategory, currentSection.getName(), currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(), GrammaticalLabelFileHandler.this.getLineNumber(), val); logger.log(getProblemLogLevel(), "###\t" + error.getMessage()); newCategories = Collections.emptyList(); } } this.categories = newCategories; } @Override void addLabelDataToParent(Object data) { @SuppressWarnings("unchecked") ChoiceTag parent = (ChoiceTag)getParent(); for (E category : categories) { if (category == null) { // Was default parent.setDefault(data); } else { parent.addLabelData(category, data); } } } } NounRefTag constructNounTag(String entityName, Attributes atts, String entityElement) { boolean isCapital = false; Integer ref = null; TermAttributes ta = new TermAttributes(getDictionary().getDeclension(), atts); String realEntityName = entityName; // entityName could be null if this called for constructing ModTag if (entityElement != null) { isCapital = Character.isUpperCase(entityElement.charAt(0)); realEntityName = ENTITY; } else if (entityName != null) { isCapital = Character.isUpperCase(realEntityName.charAt(0)); } String escapeHtmlStr = atts.getValue(ESCAPE_HTML); boolean escapeHtml = "true".equals(escapeHtmlStr) || "y".equals(escapeHtmlStr); if (entityName != null) { String lowerName = realEntityName.toLowerCase(); // Try and get the noun Noun n = getParser().getDictionary().getNoun(lowerName, false); // Get the form for the noun. NounForm nid = ta.getExactNounForm(); if (nid == null) { // Log a different error for articles vs non articles if (ta.getArticle() != LanguageArticle.ZERO) { logger.finest("###\tNoun form " + ta + " at " + currentSection.getName() + "." + currentParam.getName() + " uses antiquated article form. Stop it."); } else { logger.log(getProblemLogLevel(), "###\tNoun form " + ta + " at " + currentSection.getName() + "." + currentParam.getName() + " not defined for this type of language"); } nid = ta.getApproximateNounForm(); } // first test if this is for custom entities. They need to keep track of the reference id if (getParser().getDictionary().isCustom(lowerName)) { // takes entity="0", "1", ... in addition to entityName String testRef = atts.getValue(ENTITY); try { ref = Integer.parseInt(testRef); } catch (Exception ex) { // Yeah, this should be NPE || NumberFormatException. This is simpler. } if (ref == null) { logger.log(getProblemLogLevel(), "###\tCustom entity <" + entityName + "> at " + currentSection.getName() + "." + currentParam.getName() + " must have entity attribute"); return null; } return NounRefTag.getNounTag(realEntityName, ref, isCapital, escapeHtml, nid); } // OK, we have a "real" noun for it. if ( n != null) { // this is %entity/%compoundNouns - check plural="y" in case someone set return NounRefTag.getNounTag(realEntityName, null, isCapital, escapeHtml, nid); } // See if it's a lowercase alias. n = getParser().getDictionary().getNounByPluralAlias(lowerName, false); if ( n != null ) { // Get the "correct" term attribute based on plural overrides NounForm overrideForm = getDictionary().getDeclension().getExactNounForm(LanguageNumber.PLURAL, nid.getCase(), nid.getPossessive(), nid.getArticle()); if (overrideForm == null) { // Look for legacy article forms... if (ta.getArticle() != LanguageArticle.ZERO && getDictionary().getDeclension().hasArticle() && !getDictionary().getDeclension().hasArticleInNounForm()) { logger.finest("###\tNoun form " + ta + " at " + currentSection.getName() + "." + currentParam.getName() + " uses antiquated article form. Stop it."); overrideForm = getDictionary().getDeclension().getApproximateNounForm(LanguageNumber.PLURAL, nid.getCase(), nid.getPossessive(), nid.getArticle()); } } return NounRefTag.getNounTag(n.getName(), null, isCapital, escapeHtml, overrideForm); } } return null; } /** * Convert a Unicode-escaped String (uXXXX) into Unicode. * * @throws IllegalArgumentException * If there is a substring starting with \\uXXXX where the value of XXXX is an invalid one * for unicode. */ public static String convertEscapedToUnicode(String str) { int len = str.length(); DeferredStringBuilder buf = new DeferredStringBuilder(str); for (int i = 0; i < len; ++i) { char c = str.charAt(i); if (c != '\\') { buf.append(c); } else { // found an escape char '\\' at the end of the string if (++i >= len) { buf.append('\\'); break; } c = str.charAt(i); // look at next character switch (c) { case 't': buf.append('\t'); break; case 'r': buf.append('\r'); break; case 'n': buf.append('\n'); break; case 'f': buf.append('\f'); break; case 'u': try { ++i; // need to look at XXXX portion of \\uXXXX escape sequence buf.append((char)Integer.parseInt(str.substring(i, i + 4), 16)); i += 3; } catch (NullPointerException | StringIndexOutOfBoundsException | NumberFormatException x) { throw new IllegalArgumentException("Malformed \\uxxxx encoding at position " + (i - 2) + " in " + str); } break; default: buf.append(c); } } } return buf.toString(); } // ----------------------------------------------------------- // Label Debugger support // ---------------------------------------------------------- private String BASE_FILE = null; private Map sectionToFileName; Map getSectionToFileName() { return this.sectionToFileName; } }