Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.force.i18n.grammar.parser.GrammaticalLabelFileHandler Maven / Gradle / Ivy
Go to download
Localization Framework that allows grammatically correct renaming of nouns
* Copyright (c) 2017,, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or
package com.force.i18n.grammar.parser;
import static com.force.i18n.commons.util.settings.IniFileUtil.intern;
import java.util.*;
import java.util.function.Function;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.xml.sax.*;
import com.force.i18n.*;
import com.force.i18n.commons.text.DeferredStringBuilder;
import com.force.i18n.commons.text.TextUtil;
import com.force.i18n.grammar.*;
import com.force.i18n.grammar.parser.GrammaticalLabelFileParser.ErrorInfo;
import com.force.i18n.grammar.parser.GrammaticalLabelFileParser.ErrorType;
import com.force.i18n.settings.PropertyFileData;
import com.force.i18n.settings.TrackingHandler;
* @author stamm
class GrammaticalLabelFileHandler extends TrackingHandler {
private static final Logger logger = Logger.getLogger(GrammaticalLabelFileHandler.class.getName());
private static final boolean LOG_DUPLICATE_LABELS = "true".equals(I18nJavaUtil.getProperty("logDuplicateLabels")); // Don't log duplicate as a matter of course because they are often duplicated to ease translation
private final GrammaticalLabelFileParser parser;
private final URL baseDir;
private final PropertyFileData data;
private BaseTag currentTag = null;
private BaseTag currentParam = null;
private SectionTag currentSection = null;
* Package protected constructor used by the LabelParser.
* @param dataFile
* XML file this handler is reading.
* @param parser
* parser that is using this handler and thus the overall parse context.
GrammaticalLabelFileHandler(URL dataFile, PropertyFileData data, GrammaticalLabelFileParser parser) {
assert dataFile != null && data != null && parser != null; = data;
this.parser = parser;
// Directory from which we're loading data.
this.baseDir = dataFile;
if (LabelDebug.isLabelHintAllowed()) {
BASE_FILE = dataFile.getPath();
this.sectionToFileName = parser.getSectionToFileName();
final Level getProblemLogLevel() {
return LanguageProviderFactory.get().getBaseLanguage() == parser.getDictionary().getLanguage() ? Level.INFO : Level.FINE; // TODO: This should turn into INFO;
final Level getSevereProblemLogLevel() {
return LanguageProviderFactory.get().getBaseLanguage() == parser.getDictionary().getLanguage() ? Level.SEVERE : Level.FINE; // TODO: This should turn into INFO for other langs;
int getLineNumber() {
Locator loc = super.getLocator();
return loc == null ? 0 : loc.getLineNumber();
URL getFileURL() {
return super.getFile();
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
public void characters(char[] ch, int start, int length) throws SAXException {
if (this.currentTag != null)
this.currentTag.characters(ch, start, length);
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String,
* org.xml.sax.Attributes)
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (this.currentTag != null) {
this.currentTag.startElement(uri, localName, qName, attributes);
} else if (ROOT.equals(localName)) {
this.currentTag = new RootTag();
} else {
throw new SAXNotSupportedException("bad tag:" + localName);
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
public void endElement(String uri, String localName, String qName) throws SAXException {
if (this.currentTag != null) {
if (localName.equals(this.currentTag.getTagName())) {
this.currentTag = this.currentTag.getParent();
} else {
logger.log(getProblemLogLevel(), "###\tBad end tag <" + localName + "> found, ignored: " + getLineNumberString());
// -----------------------------------------------------------------------------
// Inner classes for each tag implementation
// -----------------------------------------------------------------------------
// tag name
static final String ROOT = "iniFile";
static final String SECTION = "section";
static final String PUBLIC = "public";
static final String PARAM = "param";
static final String IMPORT = "import";
static final String IMAGE = "image";
static final String ENTITY = "entity";
static final String MOD = "modifier";
static final String ESCAPE_HTML = "escapeHtml";
static final String NUM = "num";
static final String PLURAL = "plural";
static final String GENDER = "gender";
static final String COUNTER = "counter"; // Used for classifier
static final String WHEN = "when";
// attributes
static final String NAME = "name";
static final String ALIAS = "alias";
static final String ERROR = "error";
static final String TYPE = "type";
static final String VAL = "val";
static final String DEFAULT = "default";
static final String YES = "y";
static final String NO = "n";
GrammaticalLabelFileParser getParser() { return this.parser; }
LanguageDictionary getDictionary() { return getParser().getDictionary(); }
* The base class for each tag implementation.
abstract class BaseTag {
private BaseTag parent = null;
private String name;
private Attributes attributes;
BaseTag() {}
BaseTag(BaseTag parent, Attributes atts) throws SAXParseException {
this.parent = parent;
this.attributes = atts;
if (atts != null) {
String n = atts.getValue(NAME); = (n != null) ? intern(n.trim()) : null;
if ( == null && isNameRequired())
throw new SAXParseException("Missing required attribuite:" + NAME, getLocator());
final BaseTag getParent() {
return this.parent;
abstract String getTagName();
final String getName() {
Attributes getAttributes() {
return this.attributes;
* Pass through the SAX event for characters
void characters(char[] ch, int start, int length) {}
* Pass through the SAX event for starting the element
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {}
* Pass through the SAX event for ending the last element
void endElement() {}
* @return whether the name attribute is required for this tag
boolean isNameRequired() {
return false;
* hook to remember enclosing tags
void addTag(RefTag tag) {}
class BadTag extends BaseTag {
private final String tagName;
BadTag(BaseTag parent, String localName) throws SAXParseException {
super(parent, null);
this.tagName = localName;
logger.log(getProblemLogLevel(), "###\tBad tag <" + localName + "> found, ignored: " + getLineNumberString());
String getTagName() {
return this.tagName;
* The root tag - <iniFile>. The root tag can only contain <section> or <import> tag
private class RootTag extends BaseTag {
String getTagName() {
return ROOT;
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (SECTION.equals(localName)) {
currentTag = new SectionTag(this, attributes);
} else if (IMPORT.equals(localName)) {
try {
currentTag = new ImportTag(this, attributes);
} catch (MalformedURLException e) {
throw new IllegalArgumentException(e);
} else {
currentTag = new BadTag(this, localName);
private class ImportTag extends BaseTag {
String getTagName() {
return IMPORT;
ImportTag(RootTag parent, Attributes atts) throws SAXParseException, MalformedURLException {
super(parent, atts);
// Fire up the new parser and populate the inifile with the retrieved data.
getParser().parseLabels(, new URL(baseDir, atts.getValue(0)));
* The section tag - <section>. The section tag contains either <param> or <img> tag
private class SectionTag extends BaseTag {
String getTagName() {
return SECTION;
SectionTag(RootTag parent, Attributes atts) throws SAXParseException {
super(parent, atts);
if (LabelDebug.isLabelHintAllowed()) {
sectionToFileName.put(getName(), BASE_FILE);
currentSection = this;
boolean isSectionPublic = "true".equalsIgnoreCase(atts.getValue(PUBLIC));
if (isSectionPublic) {;
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (PARAM.equals(localName)) {
currentTag = new ParamTag(this, attributes);
} else {
currentTag = new BadTag(this, localName);
void endElement() {
currentSection = null;
boolean isNameRequired() {
return true;
void addLabelData(String name, Object data) {
Object existingValue =, name, data);
if (existingValue != null) { // TODO: Chinese is stored with a zh_TW being the default
HumanLanguage lang = LanguageProviderFactory.get().getProvider().getLanguage(;
// TODO SLT: fix this.
if (!ImmutableSet.of(LanguageConstants.CHINESE_TW, LanguageConstants.SPANISH_MX).contains(lang.getLocaleString())) {
logger.log(getProblemLogLevel(), "Redundant label found: " + name + " in section: " + getName()
+ ", Existing Value: " + existingValue);
} else {
// Java compiler doesn't optimize away a dead local store if the user of the variable is inside a dead-code optimized block., name, data);
* The param tag - <param name="param1">. The param tag takes three attributes
* name: parameter name. this is always required.
* alias: Use alias to make this param refers to the other label. e.g. <param name="val1"
* alias="sec1.val1"/>
* entity: specify default entity name. This is used if modifier tag has no entity attribute specified,
* and if there is no nouns defined.
private class ParamTag extends StringTag {
String getTagName() {
return PARAM;
ParamTag(SectionTag parent, Attributes atts) throws SAXParseException {
super(parent, atts);
currentParam = this;
boolean isNameRequired() {
return true;
void endElement() {
currentParam = null;
void addLabelDataToParent(Object data) {
((SectionTag)getParent()).addLabelData(getName(), data);
* The param tag - <param name="param1">. The param tag takes three attributes
* name: parameter name. this is always required.
* alias: Use alias to make this param refers to the other label. e.g. <param name="val1"
* alias="sec1.val1"/>
* entity: specify default entity name. This is used if modifier tag has no entity attribute specified,
* and if there is no nouns defined.
private abstract class StringTag extends BaseTag {
private boolean isAlias;
private StringBuilder sb;
private final ArrayList values; // the type is specific (eg not List<>) to allow cloning
StringTag(BaseTag parent, Attributes atts) throws SAXParseException {
super(parent, atts);
this.values = new ArrayList<>();
this.isAlias = false;
// if alias is specified, ignore all remaining attributes/contents
String alias = atts.getValue(ALIAS);
if (alias != null) {
int i = alias.indexOf('.');
if (i <= 0)
logger.log(getProblemLogLevel(), "###\tBad alias name " + alias + " at " + parent.getName() + "." + getName() + " in " + getDictionary().getLanguage());
else {
this.isAlias = true;
getParser().addAlias(parent.getName(), getName(), alias.substring(0, i),
alias.substring(i + 1), getFile(), getLocator().getLineNumber());
if (!this.isAlias) {
// make sure we no longer have alias on it. This is for the case that english label
// has this param as an alias, but overriding file has not.
getParser().removeAlias(parent.getName(), getName());
private void addBufferedText() {
String s = convertEscapedToUnicode(;
// If we ever wanted to keep large labels on disk/out of memory,
// this would be great place to inject something. Perhaps storing
// in sleepycat or somesuch thing.
s = getParser().uniquefy(s);
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
// if this is alias specified, ignore any internal tag
if (this.isAlias)
// push buffered text into array
if ( != null && > 0) {
addBufferedText(); = null;
String lowerName = localName.toLowerCase();
// or
if (ENTITY.equals(lowerName)) {
currentTag = new NounTag(this, attributes, localName, true);
} else if (PLURAL.equals(lowerName)) {
currentTag = new PluralTag(this, attributes, localName);
} else if (GENDER.equals(lowerName)) {
currentTag = new GenderTag(this, attributes, localName);
} else if (getParser().getDictionary().isAdjective(lowerName)) {
currentTag = new AdjTag(this, attributes, localName);
} else if (getParser().getDictionary().isArticle(lowerName)) {
currentTag = new ArtTag(this, attributes, localName);
} else if (COUNTER.equals(lowerName)) {
currentTag = new CounterTag(this, attributes, localName);
} else {
currentTag = new NounTag(this, attributes, localName, false);
void characters(char[] ch, int start, int length) {
if (!isAlias) {
if ( == null) = new StringBuilder();, start, length);
* Add the label data to the parent object
* @param data
abstract void addLabelDataToParent(Object data);
void endElement() {
if (this.isAlias) {
// register to name only - leave as null at this point
} else {
// push any remaining string
if ( != null && > 0) {
if (getParser().trackDupes()) {
getParser().trackLabel(, getFile().getPath() + ":" + ((SectionTag)getParent()).getName() + "." + getName());
if (values.isEmpty()) {
} else {
if (this.values.size() == 1) {
} else {
List rawData = finishParsingLabelsNew(this.values);
protected List finishParsingLabelsNew(List values) {
// The goal here is to determine noun phrases correctly. We do that by assuming that noun phrases are of the following form
// [article] [adjective]+ noun [adjective]+
// We need to:
// Fix up the articles based on the noun and the next term
// Fix up the adjective based on the noun, the next term, and the article (whether from an article tag or from the noun)
// Possibly fix up the noun based on the article tag for those languages that have a distinct article particle and a distinct form based on definitiveness
// So there's a two pass algorithm. In the first, we figure out the "noun" phrases
List phrases = new ArrayList<>();
NounPhrase curPhrase = new NounPhrase(values);
for (int i = 0; i < values.size(); i++) {
Object o = values.get(i);
if (o instanceof ArticleRefTag) {
if (curPhrase.isNounSet()) {
curPhrase = new NounPhrase(values); // Demarcate as a new phrase
} else if (o instanceof NounRefTag) {
if (curPhrase.isNounSet()) {
curPhrase = new NounPhrase(values); // New noun phrase
phrases.add(curPhrase); // Once there's a noun, there's a noun phrase
} else if (o instanceof AdjectiveRefTag) {
AdjectiveRefTag refTag = (AdjectiveRefTag)o;
Adjective adj = getDictionary().getAdjective(refTag.getName());
LanguagePosition pos = refTag.getDeclensionOverrides().getPosition(); // Allow for position overrides in case it's "different"
if (pos == null) pos = adj.getPosition();
if (pos == LanguagePosition.PRE && curPhrase.isNounSet()) {
curPhrase = new NounPhrase(values); // Demarcate as a new phrase
if (adj.isCopiedFromDefault()) {
logger.log(getSevereProblemLogLevel(), "Adjective copied from english for " + getFile().getPath() + ":" + ((SectionTag)getParent()).getName() + "." + getName());
// add to the last noun phrase
} else if (o instanceof AdnominalRefTag) {
if (curPhrase.isNounSet()) {
curPhrase = new NounPhrase(values); // Demarcate as a new phrase
if (!curPhrase.isNounSet()) {
if (phrases.isEmpty()) {
logger.log(getSevereProblemLogLevel(), "Adjective used without a noun for " + getFile().getPath() + ":" + ((SectionTag)getParent()).getName() + "." + getName());
return values; // Don't deal
} else {
// Chances are we have a dangling adjective (like ( ))
if (phrases.size() > 1) {
// Log only if there's an actual issue (multiple nouns)
logger.log(getSevereProblemLogLevel(), "Prepositional Adjective used in post-position for " + getFile().getPath() + ":" + ((SectionTag)getParent()).getName() + "." + getName());
// Add the adjectives to the last noun phrase and keep going.
NounPhrase lastPhrase = phrases.get(phrases.size()-1);
for (Integer i : curPhrase.getAdjectiveLocs()) {
for (Integer i : curPhrase.getChoiceLocs()) {
boolean isEndsWithLanguage = getDictionary().getDeclension().hasEndsWith();
for (NounPhrase phrase : phrases) {
Object o = values.get(phrase.nounLoc);
assert o instanceof NounRefTag : "Invalid noun location " + phrase.nounLoc + " in " + phrase;
NounRefTag nounRef = phrase.getNounRefTag();
ArticleRefTag articleRef = phrase.getArticleRefTag();
// Handle the article
if (articleRef != null) {
// Fix up the article of the noun based on the particle if it's interesting.
if (getDictionary().getDeclension().shouldInferNounDefArticleFromParticle()) {
LanguageArticle articleType = ((Article)articleRef.resolveModifier(getDictionary())).getArticleType();
// Update the noun with the article
if (articleType == LanguageArticle.DEFINITE) {
nounRef = nounRef.makeArticled(getDictionary(), articleType);
values.set(phrase.getNounLoc(), nounRef);
// Figure out the "next thing"
if (phrase.hasAdjectives()) {
int nextAdjective = phrase.getAdjectiveLoc(isEndsWithLanguage ? -1 : 0);
values.set(phrase.getArticleLoc(), articleRef.fixupModifier(nounRef, nextAdjective < phrase.getNounLoc() ? (TermRefTag) values.get(nextAdjective) : nounRef));
} else {
values.set(phrase.getArticleLoc(), articleRef.fixupModifier(nounRef, nounRef));
// Handle special case for bulgarian where all noun inflections should move to the first modifier automatically if there is a modifier
// Modify the noun to have no inflection, and move the modifiers back to the adjective ref tag.
if (getDictionary().getDeclension().moveNounInflectionToFirstModifier() && phrase.hasAdjectives()) {
NounRefTag nounTag = phrase.getNounRefTag();
NounForm nounForm = nounTag.getForm();
AdjectiveRefTag firstAdjectiveTag = (AdjectiveRefTag) values.get(phrase.getAdjectiveLoc(0));
if (nounForm.getArticle() != LanguageArticle.ZERO || nounForm.getCase() != LanguageCase.NOMINATIVE) {
nounRef = nounTag.makeUninflected(getDictionary());
values.set(phrase.getNounLoc(), nounRef);
values.set(phrase.getAdjectiveLoc(0), firstAdjectiveTag.fixupModifier(nounRef, nounRef, nounForm));
// Now go through the adjectives.
LanguageArticle articleOverride = articleRef != null ? getDictionary().getArticle(articleRef.getName()).getArticleType() : null;
if (isEndsWithLanguage) {
for (int i = phrase.getAdjectiveLocs().size()-1; i >= 0; i--) {
AdjectiveRefTag adjTag = (AdjectiveRefTag) values.get(phrase.getAdjectiveLoc(i));
// Figure out the "next" object
if (i > 0) {
int prevAdjective = phrase.getAdjectiveLoc(i-1);
values.set(phrase.getAdjectiveLoc(i), adjTag.fixupModifier(nounRef, prevAdjective > phrase.getNounLoc() ? (TermRefTag) values.get(prevAdjective) : nounRef, articleOverride));
} else {
// TODO: Not sure this is right.
values.set(phrase.getAdjectiveLoc(i), adjTag.fixupModifier(nounRef, nounRef, articleOverride));
} else {
for (int i = 0; i < phrase.getAdjectiveLocs().size(); i++) {
AdjectiveRefTag adjTag = (AdjectiveRefTag) values.get(phrase.getAdjectiveLoc(i));
// Figure out the "next" object
if (i < phrase.getAdjectiveLocs().size() - 1) {
int nextAdjective = phrase.getAdjectiveLoc(i+1);
values.set(phrase.getAdjectiveLoc(i), adjTag.fixupModifier(nounRef, nextAdjective < phrase.getNounLoc() ? (TermRefTag) values.get(nextAdjective) : nounRef, articleOverride));
} else {
values.set(phrase.getAdjectiveLoc(i), adjTag.fixupModifier(nounRef, nounRef, articleOverride));
// Now do choice phrases
for (int i = 0; i < phrase.getChoiceLocs().size(); i++) {
AdnominalRefTag choiceTag = (AdnominalRefTag) values.get(phrase.getChoiceLoc(i));
values.set(phrase.getChoiceLoc(i), choiceTag.getWithResolvedNounTag(nounRef));
return values;
void addTag(RefTag tag) {
* Represents the locations of various elements of a noun phrase in a label.
* @author stamm
static class NounPhrase {
private final List values;
private int nounLoc = -1;
private int articleLoc = -1;
private List adjectiveLocs = new ArrayList<>();
private List choiceLocs = new ArrayList<>();
public NounPhrase(List values) {
this.values = values;
public NounRefTag getNounRefTag() {
return nounLoc == -1 ? null : (NounRefTag) values.get(nounLoc);
public ArticleRefTag getArticleRefTag() {
return articleLoc == -1 ? null : (ArticleRefTag) values.get(articleLoc);
public void setNounLoc(int i) {
this.nounLoc = i;
assert values.get(i) instanceof NounRefTag : "Illegal noun loc";
public void setArticleLoc(int i) {
this.articleLoc = i;
assert values.get(i) instanceof ArticleRefTag : "Illegal article loc";
public int getArticleLoc() { return this.articleLoc; }
public int getNounLoc() { return this.nounLoc; }
public void addAdjectiveLoc(int i) {
assert values.get(i) instanceof AdjectiveRefTag : "Illegal adjective loc";
public List getAdjectiveLocs() {
return this.adjectiveLocs;
public int getAdjectiveLoc(int offset) {
if (offset == -1) { // Support -1 to get the "Last" adjective
return this.adjectiveLocs.get(this.adjectiveLocs.size()-1);
return this.adjectiveLocs.get(offset);
public void addChoiceLoc(int i) {
assert values.get(i) instanceof AdnominalRefTag : "Illegal choice loc";
public List getChoiceLocs() {
return this.choiceLocs;
public int getChoiceLoc(int offset) {
if (offset == -1) { // Support -1 to get the "Last" adjective
return this.choiceLocs.get(this.choiceLocs.size()-1);
return this.choiceLocs.get(offset);
public boolean isNounSet() {
return nounLoc != -1;
public boolean hasAdjectives() {
return !this.adjectiveLocs.isEmpty();
public boolean hasChoices() {
return !this.choiceLocs.isEmpty();
public int getPhraseStart() {
if (articleLoc != -1) return articleLoc;
if (adjectiveLocs.isEmpty()) return nounLoc;
return Math.min(adjectiveLocs.get(0), nounLoc);
public int getPhraseEnd() {
if (adjectiveLocs.isEmpty()) return nounLoc;
return Math.max(adjectiveLocs.get(adjectiveLocs.size()-1), nounLoc);
public String toString() {
return values.subList(getPhraseStart(), getPhraseEnd() + 1).toString();
* Entity or any noun tag - e.g. <Accounts/>, <Account_Name/>, <Entity ref="0"/>
private class NounTag extends BaseTag {
private final NounRefTag nounTag;
private final String tagName;
String getTagName() {
return this.tagName;
NounTag(StringTag parent, Attributes atts, String localName, boolean isEntityTag) throws SAXParseException {
super(parent, atts);
this.tagName = localName;
String entityName = isEntityTag ? atts.getValue(ENTITY) : localName;
String entityAttr = isEntityTag ? localName : null;
this.nounTag = constructNounTag(entityName, atts, entityAttr);
if (this.nounTag == null) {
ErrorInfo error = parser.addInvalidLabel(ErrorType.UnknownEntity, currentSection.getName(),
currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(),
GrammaticalLabelFileHandler.this.getLineNumber(), entityName);
logger.log(getProblemLogLevel(), "###\t" + error.getMessage());
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
// no inner tag is allowed
currentTag = new BadTag(this, localName);
void endElement() {
if (this.nounTag != null)
* Adjective tag. <New entity="Accounts"/>
class AdjTag extends BaseTag {
AdjectiveRefTag modTag;
private final String tagName;
String getTagName() {
return this.tagName;
AdjTag(StringTag parent, Attributes atts, String localName) throws SAXParseException {
super(parent, atts);
this.tagName = localName;
String lowerName = tagName.toLowerCase();
// handle case
String entityName = atts.getValue(ENTITY);
NounRefTag refTag = constructNounTag(entityName, atts, entityName == null
|| !Character.isDigit(entityName.charAt(0)) ? null : entityName);
TermAttributes attributes = new TermAttributes(getDictionary().getDeclension(), atts, false);
this.modTag = AdjectiveRefTag.getAdjectiveRefTag(lowerName, refTag, refTag,
Character.isUpperCase(tagName.charAt(0)), attributes);
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
// no inner tag is allowed
currentTag = new BadTag(this, localName);
void endElement() {
if (this.modTag != null)
* Adjective tag. <New entity="Accounts"/>
class ArtTag extends BaseTag {
ArticleRefTag modTag;
private final String tagName;
String getTagName() {
return this.tagName;
ArtTag(StringTag parent, Attributes atts, String localName) throws SAXParseException {
super(parent, atts);
this.tagName = localName;
String lowerName = tagName.toLowerCase();
// handle case
String entityName = atts.getValue(ENTITY);
NounRefTag refTag = constructNounTag(entityName, atts, entityName == null
|| !Character.isDigit(entityName.charAt(0)) ? null : entityName);
TermAttributes attributes = new TermAttributes(getDictionary().getDeclension(), atts, false);
this.modTag = ArticleRefTag.getArticleRefTag(lowerName, refTag, refTag,
Character.isUpperCase(tagName.charAt(0)), attributes);
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
// no inner tag is allowed
currentTag = new BadTag(this, localName);
void endElement() {
if (this.modTag != null)
* Conter tag. <Counter/>
class CounterTag extends BaseTag {
String getTagName() {
return COUNTER;
CounterTag(StringTag parent, Attributes atts, String localName) throws SAXParseException {
super(parent, atts);
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
// no inner tag is allowed
currentTag = new BadTag(this, localName);
void endElement() {
getParent().addTag(new CounterRefTag(null));
* Represents a tag that can have "when" tags, or, when the when's are unspecified, a default value.
private abstract class ChoiceTag> extends StringTag {
private Map perCategory;
private Object defaultVal; // Some choices have a default that is a specific categories, others are explicit based on the resolution of the noun.
ChoiceTag(StringTag parent, Attributes atts, String localName, Class type) throws SAXParseException {
super(parent, atts);
perCategory = new EnumMap<>(type);
defaultVal = null;
void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (localName.equalsIgnoreCase(WHEN)) {
currentTag = new WhenTag(this, attributes, localName);
} else {
super.startElement(uri, localName, qName, attributes);
void addLabelData(E val, Object data) {
if (val == null) return; // This means we were invalid
Object previous = perCategory.put(val, data);
if (previous != null) {
ErrorInfo error = parser.addInvalidLabel(ErrorType.DuplicateWhen, currentSection.getName(),
currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(),
GrammaticalLabelFileHandler.this.getLineNumber(), val);
logger.log(getProblemLogLevel(), "###\t" + error.getMessage());
* @return the default category to use for the unadorned tags, or when with isDefault="true"
* Can be null
abstract E getDefaultCategory();
abstract Function getCategoryFromLabel();
void setDefault(Object data) {
if (getDefaultCategory() == null) {
this.defaultVal = data;
} else {
if (perCategory.containsKey(getDefaultCategory())) {
ErrorInfo error = parser.addInvalidLabel(ErrorType.BadDefault, currentSection.getName(),
currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(),
GrammaticalLabelFileHandler.this.getLineNumber(), getDefaultCategory());
logger.log(getProblemLogLevel(), "###\t" + error.getMessage());
} else {
addLabelData(getDefaultCategory(), data);
Object getDefault() {
return getDefaultCategory() != null ? getPerCategory().get(getDefaultCategory()) : this.defaultVal;
Map getPerCategory() {
return perCategory;
void addLabelDataToParent(Object data) {
// If it's empty string or just whitespace, meh.
if (data == null || (data instanceof String && TextUtil.isNullEmptyOrWhitespace((String)data))) return;
* Plural tag with choices <plural val="0"> with embedded choices. The "non" when tags
private class PluralTag extends ChoiceTag {
private final int num;
String getTagName() {
return PLURAL;
PluralTag(StringTag parent, Attributes atts, String localName) throws SAXParseException {
super(parent, atts, localName, PluralCategory.class);
int _num;
try {
_num = Integer.parseInt(atts.getValue(NUM));
} catch (NumberFormatException ex) {
_num = 0;
ErrorInfo error = parser.addInvalidLabel(ErrorType.BadPluralReference, currentSection.getName(),
currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(),
GrammaticalLabelFileHandler.this.getLineNumber(), atts.getValue(NUM));
logger.log(getProblemLogLevel(), "###\t" + error.getMessage());
num = _num;
PluralCategory getDefaultCategory() {
return PluralCategory.OTHER;
Function getCategoryFromLabel() {
return a -> PluralCategory.fromLabel(a);
void endElement() {
getParent().addTag(new PluralRefTag(num, getPerCategory(), getDefault()));
* Plural tag with choices <plural val="0"> with embedded choices. The "non" when tags
private class GenderTag extends ChoiceTag {
String getTagName() {
return GENDER;
GenderTag(StringTag parent, Attributes atts, String localName) throws SAXParseException {
super(parent, atts, localName, LanguageGender.class);
LanguageGender getDefaultCategory() {
return null;
Function getCategoryFromLabel() {
return a -> LanguageGender.fromLabelValue(a);
void endElement() {
getParent().addTag(new GenderRefTag(null, getPerCategory(), getDefault()));
* when tag to specify choices <when ="0"> with embedded choices
private class WhenTag> extends StringTag {
private final List categories;
String getTagName() {
return WHEN;
WhenTag(ChoiceTag parent, Attributes atts, String localName) throws SAXParseException {
super(parent, atts);
boolean isDefault;
List val = TextUtil.splitSimple(",", atts.getValue(VAL));
isDefault = YES.equalsIgnoreCase(atts.getValue(DEFAULT));
List newCategories = val != null
: null;
if (newCategories == null || -> a == null)) {
if (isDefault && newCategories == null) {
newCategories = Collections.singletonList(parent.getDefaultCategory());
} else {
ErrorInfo error = parser.addInvalidLabel(ErrorType.BadCategory, currentSection.getName(),
currentParam.getName(), GrammaticalLabelFileHandler.this.getFileURL(),
GrammaticalLabelFileHandler.this.getLineNumber(), val);
logger.log(getProblemLogLevel(), "###\t" + error.getMessage());
newCategories = Collections.emptyList();
this.categories = newCategories;
void addLabelDataToParent(Object data) {
ChoiceTag parent = (ChoiceTag)getParent();
for (E category : categories) {
if (category == null) {
// Was default
} else {
parent.addLabelData(category, data);
NounRefTag constructNounTag(String entityName, Attributes atts, String entityElement) {
boolean isCapital = false;
Integer ref = null;
TermAttributes ta = new TermAttributes(getDictionary().getDeclension(), atts);
String realEntityName = entityName;
// entityName could be null if this called for constructing ModTag
if (entityElement != null) {
isCapital = Character.isUpperCase(entityElement.charAt(0));
realEntityName = ENTITY;
} else if (entityName != null) {
isCapital = Character.isUpperCase(realEntityName.charAt(0));
String escapeHtmlStr = atts.getValue(ESCAPE_HTML);
boolean escapeHtml = "true".equals(escapeHtmlStr) || "y".equals(escapeHtmlStr);
if (entityName != null) {
String lowerName = realEntityName.toLowerCase();
// Try and get the noun
Noun n = getParser().getDictionary().getNoun(lowerName, false);
// Get the form for the noun.
NounForm nid = ta.getExactNounForm();
if (nid == null) {
// Log a different error for articles vs non articles
if (ta.getArticle() != LanguageArticle.ZERO) {
logger.finest("###\tNoun form " + ta + " at " + currentSection.getName() + "."
+ currentParam.getName() + " uses antiquated article form. Stop it.");
} else {
logger.log(getProblemLogLevel(), "###\tNoun form " + ta + " at " + currentSection.getName() + "."
+ currentParam.getName() + " not defined for this type of language");
nid = ta.getApproximateNounForm();
// first test if this is for custom entities. They need to keep track of the reference id
if (getParser().getDictionary().isCustom(lowerName)) {
// takes entity="0", "1", ... in addition to entityName
String testRef = atts.getValue(ENTITY);
try {
ref = Integer.parseInt(testRef);
} catch (Exception ex) {
// Yeah, this should be NPE || NumberFormatException. This is simpler.
if (ref == null) {
logger.log(getProblemLogLevel(), "###\tCustom entity <" + entityName + "> at " + currentSection.getName() + "."
+ currentParam.getName() + " must have entity attribute");
return null;
return NounRefTag.getNounTag(realEntityName, ref, isCapital, escapeHtml, nid);
// OK, we have a "real" noun for it.
if ( n != null) {
// this is %entity/%compoundNouns - check plural="y" in case someone set
return NounRefTag.getNounTag(realEntityName, null, isCapital, escapeHtml, nid);
// See if it's a lowercase alias.
n = getParser().getDictionary().getNounByPluralAlias(lowerName, false);
if ( n != null ) {
// Get the "correct" term attribute based on plural overrides
NounForm overrideForm = getDictionary().getDeclension().getExactNounForm(LanguageNumber.PLURAL, nid.getCase(), nid.getPossessive(), nid.getArticle());
if (overrideForm == null) {
// Look for legacy article forms...
if (ta.getArticle() != LanguageArticle.ZERO && getDictionary().getDeclension().hasArticle() &&
!getDictionary().getDeclension().hasArticleInNounForm()) {
logger.finest("###\tNoun form " + ta + " at " + currentSection.getName() + "."
+ currentParam.getName() + " uses antiquated article form. Stop it.");
overrideForm = getDictionary().getDeclension().getApproximateNounForm(LanguageNumber.PLURAL, nid.getCase(), nid.getPossessive(), nid.getArticle());
return NounRefTag.getNounTag(n.getName(), null, isCapital, escapeHtml, overrideForm);
return null;
* Convert a Unicode-escaped String
(uXXXX) into Unicode.
* @throws IllegalArgumentException
* If there is a substring starting with \\uXXXX
where the value of XXXX is an invalid one
* for unicode.
public static String convertEscapedToUnicode(String str) {
int len = str.length();
DeferredStringBuilder buf = new DeferredStringBuilder(str);
for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
if (c != '\\') {
} else {
// found an escape char '\\' at the end of the string
if (++i >= len) {
c = str.charAt(i); // look at next character
switch (c) {
case 't':
case 'r':
case 'n':
case 'f':
case 'u':
try {
++i; // need to look at XXXX portion of \\uXXXX escape sequence
buf.append((char)Integer.parseInt(str.substring(i, i + 4), 16));
i += 3;
catch (NullPointerException | StringIndexOutOfBoundsException | NumberFormatException x) {
throw new IllegalArgumentException("Malformed \\uxxxx encoding at position " + (i - 2)
+ " in " + str);
return buf.toString();
// -----------------------------------------------------------
// Label Debugger support
// ----------------------------------------------------------
private String BASE_FILE = null;
private Map sectionToFileName;
Map getSectionToFileName() {
return this.sectionToFileName;