org.modelcc.language.factory.LanguageSpecificationFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ModelCC Show documentation
Show all versions of ModelCC Show documentation
ModelCC is a model-based parser generator (a.k.a. compiler compiler) that decouples language specification from language processing, avoiding some of the problems caused by grammar-driven parser generators. ModelCC receives a conceptual model as input, along with constraints that annotate it. It is then able to create a parser for the desired textual language and the generated parser fully automates the instantiation of the language conceptual model. ModelCC also includes a built-in reference resolution mechanism that results in abstract syntax graphs, rather than mere abstract syntax trees.
The newest version!
/*
* ModelCC, distributed under ModelCC Shared Software License, www.modelcc.org
*/
package org.modelcc.language.factory;
import java.io.Serializable;
import java.lang.reflect.Modifier;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.modelcc.AssociativityType;
import org.modelcc.CompositionType;
import org.modelcc.Position;
import org.modelcc.SeparatorPolicy;
import org.modelcc.language.LanguageException;
import org.modelcc.language.LanguageSpecification;
import org.modelcc.language.lexis.*;
import org.modelcc.language.metamodel.*;
import org.modelcc.language.syntax.*;
import org.modelcc.lexer.recognizer.PatternRecognizer;
import org.modelcc.metamodel.Evaluator;
/**
* Language Specification Factory.
*
* @author Luis Quesada ([email protected]), refactored by Fernando Berzal ([email protected])
*/
public class LanguageSpecificationFactory implements Serializable
{
// Specification factory
private LexicalSpecificationFactory lsf = new LexicalSpecificationFactory();
private SyntaxSpecificationFactory ssf = new SyntaxSpecificationFactory();
private MemberSpecificationFactory msf = new MemberSpecificationFactory();
// Mappings
private Map identifiers = new HashMap();
private Map symbols = new HashMap();
private Map references = new HashMap();
private Map> symbolRules = new HashMap>();
private Map symbolTokens = new HashMap();
private Map delimiters = new HashMap();
private Map delimiterTokens = new HashMap();
private Map lists = new HashMap();
// Rules
private Set rules = new HashSet();
/**
* Converts a model into a language specification
* @param m the language model
* @return the corresponding language specification
* @throws LanguageException
*/
public LanguageSpecification create(LanguageModel m) throws LanguageException
{
createMaps(m);
LexicalSpecification ls = createLexicalSpecification(m);
SyntaxSpecification ss = createSyntaxSpecification(m);
return new LanguageSpecification(ls,ss);
}
// Element maps
private void createMaps(LanguageModel m)
{
SymbolIdentifier id;
for (LanguageElement el: m.getElements()) {
id = new SymbolIdentifier(SymbolType.ELEMENT,el,null,false);
identifiers.put(el,id);
symbols.put(el, new RuleSymbol(id));
id = new SymbolIdentifier(SymbolType.ELEMENT,el,null,true);
references.put(el, new RuleSymbol(id));
}
}
private Set getRules(LanguageElement el)
{
Set sr = symbolRules.get(el);
if (sr == null) {
sr = new HashSet();
symbolRules.put(el,sr);
}
return sr;
}
// Lexical information
// -------------------
private LexicalSpecification createLexicalSpecification (LanguageModel m)
throws LanguageException
{
createDelimiters(m);
// Language tokens
TokenBuilder tokenBuilder = new TokenBuilder(m);
for (LanguageElement el: m.getElements()) {
if (SimpleLanguageElement.class.isAssignableFrom(el.getClass())) {
SymbolIdentifier eid = identifiers.get(el);
SymbolIdentifier beid = new SymbolIdentifier(SymbolType.BASIC,el,el.getSeparator(),false);
Rule r = new Rule( new RuleSymbol(eid) );
addDelimiters(r, el.getPrefix());
r.add( new RuleSymbol(beid) );
addDelimiters(r, el.getSuffix());
Set sr = getRules(el);
ssf.addRule(r);
sr.add(r);
SimpleLanguageElement bel = (SimpleLanguageElement)el;
// XXX Hack: pattern matches empty string
if (bel.matchesEmptyString()) {
RuleSymbol rs = new RuleSymbol(eid);
Rule er = new Rule(rs);
addDelimiters(er, el.getPrefix());
addDelimiters(er, el.getSuffix());
ssf.addRule(er);
sr.add(er);
}
TokenSpecification ts = new TokenSpecification(beid,bel.getPatternRecognizer(),tokenBuilder);
lsf.addTokenSpecification(ts);
symbolTokens.put(bel,ts);
}
}
return lsf.create(m);
}
private void addDelimiters (Rule r, List patterns)
{
if (patterns!=null)
for (int i=0; i members = msf.create(ce);
Set sr = getRules(element);
for (MemberSpecification spec: members)
sr.addAll(createCompositeRule(m,ssf,symbol,spec,element));
for (Rule r: sr)
ssf.addRule(r);
createPrecedences(ce, sr);
if (!ce.getKeyMembers().isEmpty()) {
symbol = references.get(element);
for (List act: reference(ce)) {
ssf.addRule(createReferenceRule(m,symbol,act));
}
}
}
}
for (Rule rule: rules) {
ssf.addRule(rule);
}
}
// Precedences
private void createPrecedences(CompositeLanguageElement ce, Set sr)
{
CompositionType ctyp = ce.getComposition();
for (Rule r1 : sr) {
for (Rule r2 : sr) {
if (r1 != r2) {
boolean r1ssr2 = superset(r1,r2);
boolean r2ssr1 = superset(r2,r1);
switch (ctyp) {
case EAGER:
if (r1ssr2 && !r2ssr1) {
ssf.addStartPrecedence(r1,r2);
ssf.addCompositionPrecedence(r1,r2);
}
break;
case LAZY:
if (r1ssr2 && !r2ssr1) {
ssf.addStartPrecedence(r2,r1);
ssf.addCompositionPrecedence(r2,r1);
}
break;
case EXPLICIT:
ssf.addCompositionPrecedence(r2,r1);
ssf.addCompositionPrecedence(r1,r2);
break;
case UNDEFINED:
break;
}
}
}
}
}
private boolean superset(Rule r1, Rule r2)
{
for (RuleSymbol elem : r2.getRight()) {
if (!r1.getRight().contains(elem))
return false;
}
return true;
}
// Selector elements.
private void createSelectionElements (LanguageModel m)
{
for (LanguageElement el: m.getElements()) {
SymbolIdentifier eid = identifiers.get(el);
if (m.getSubelements().get(el) != null) {
for (LanguageElement el2: m.getSubelements().get(el)) {
SymbolIdentifier eid2 = identifiers.get(el2);
Rule r = new Rule( new RuleSymbol(eid));
r.setType(Rule.Type.SELECTION);
addDelimiters(r, el.getPrefix());
r.add( new RuleSymbol(eid2) );
addDelimiters(r, el.getSuffix());
ssf.addRule(r);
Set sr = getRules(el);
sr.add(r);
}
}
}
}
// Associativities
private void createAssociativities (LanguageModel m)
{
for (LanguageElement el: m.getElements()) {
SymbolIdentifier eid = identifiers.get(el);
if (el.getAssociativity() != AssociativityType.UNDEFINED) {
ssf.setAssociativity(eid, el.getAssociativity());
}
}
}
// Precedences
private void createPrecedences (LanguageModel m)
{
for (LanguageElement el: m.getElements()) {
if (CompositeLanguageElement.class.isAssignableFrom(el.getClass()) || !SimpleLanguageElement.class.isAssignableFrom(el.getClass())) {
Set sr = symbolRules.get(el);
if (sr != null) {
if (m.getPrecedences().get(el) != null) {
for (LanguageElement el2: m.getPrecedences().get(el)) {
Set sr2 = symbolRules.get(el2);
if (sr2 != null) {
for (Rule r1: sr) {
for (Rule r2: sr2) {
if (CompositeLanguageElement.class.isAssignableFrom(el.getClass()) && CompositeLanguageElement.class.isAssignableFrom(el2.getClass()))
ssf.addCompositionPrecedence(r1, r2);
if (!SimpleLanguageElement.class.isAssignableFrom(el.getClass()) && !SimpleLanguageElement.class.isAssignableFrom(el2.getClass()))
ssf.addSelectionPrecedence(r1, r2);
}
}
}
}
}
}
}
}
}
// Rules
// -----
private Set createCompositeRule ( LanguageModel m,
SyntaxSpecificationFactory ssf,
RuleSymbol left,
MemberSpecification mn,
LanguageElement el)
{
List elcs = mn.getMembers();
Set ret = new HashSet();
int f = -1;
boolean found = false;
boolean err = false;
if (elcs.size() > 2) {
for (int i=0; i0) {
if (hasSubAsoc(m,m.getClassToElement().get(elcs.get(i-1).getElementClass()))) {
if (!found) found = true;
else if (i-1 != f) err = true;
f = i-1;
}
}
if (i>();
LanguageMember rep;
Map rules = new HashMap();
Map> precedes = new HashMap>();
LanguageElement e = m.getClassToElement().get(elcs.get(f).getElementClass());
for (LanguageElement sc: m.getSubelements().get(e)) {
ArrayList elcc = new ArrayList();
elcc.addAll(elcs);
rep = elcc.get(f);
elcc.remove(f);
String field = rep.getID();
boolean optional = rep.isOptional();
List prefix = rep.getPrefix();
List suffix = rep.getSuffix();
List separator = rep.getSeparator();
Class contentClass = sc.getElementClass();
boolean id = rep.isKey();
boolean reference = rep.isReference();
Evaluator evaluator = rep.getEvaluator();
LanguageMember ctx;
if (rep.getClass().equals(MemberCollection.class)) {
MemberCollectionType collection = ((MemberCollection)rep).getCollection();
int minimumMultiplicity = ((MemberCollection)rep).getMinimumMultiplicity();
int maximumMultiplicity = ((MemberCollection)rep).getMaximumMultiplicity();
ctx = new MemberCollection(field,contentClass,optional,id,reference,prefix,suffix,separator,collection,minimumMultiplicity,maximumMultiplicity,evaluator);
} else {
ctx = new LanguageMember(field,contentClass,optional,id,reference,prefix,suffix,separator,evaluator);
}
elcc.add(f,ctx);
Rule r = createCompositionRule(m,left,elcc,mn,el);
ret.add(r);
rules.put(r,m.getClassToElement().get(sc.getElementClass()));
LanguageElement elrep = m.getClassToElement().get(sc.getElementClass());
if (m.getPrecedences().get(elrep) != null) {
precedes.put(r,m.getPrecedences().get(elrep));
}
}
for (Rule r1: rules.keySet()) {
if (precedes.get(r1) != null) {
for (Rule r2: rules.keySet()) {
if (precedes.get(r1).contains(rules.get(r2))) {
ssf.addCompositionPrecedence(r1, r2);
}
}
}
}
}
return ret;
}
private boolean hasSubAsoc(LanguageModel m,LanguageElement el)
{
Set precededs = new HashSet();
if (m.getSubelements().get(el) != null) {
for (LanguageElement sc: m.getSubelements().get(el)) {
if (m.getPrecedences().get(sc) != null)
precededs.addAll(m.getPrecedences().get(sc));
}
for (LanguageElement sc: m.getSubelements().get(el)) {
if (precededs.contains(sc))
return true;
}
}
return false;
}
private Rule createCompositionRule
( LanguageModel model,
RuleSymbol symbol,
List cts,
MemberSpecification member,
LanguageElement element )
{
Rule r = createRule(model,symbol,cts,member,element);
r.setType(Rule.Type.COMPOSITION);
return r;
}
private Rule createReferenceRule
( LanguageModel model,
RuleSymbol symbol,
List cts )
{
Rule r = createRule(model,symbol,cts,null,null);
r.setType(Rule.Type.REFERENCE);
return r;
}
private Rule createRule ( LanguageModel m,
RuleSymbol left,
List cts,
MemberSpecification member,
LanguageElement el )
{
Rule r = new Rule(left);
if (el != null)
addDelimiters(r, el.getPrefix());
for (int i=0; i> reference(CompositeLanguageElement ce)
{
if (ce.isFreeOrder())
return referenceFreeOrder(ce.getKeyMembers(),new ArrayList());
else
return referenceMembers(ce.getKeyMembers(),new ArrayList());
}
private Set> referenceFreeOrder(List elcs,List act)
{
Set> ret = new HashSet>();
List copy,actcopy;
if (elcs.size() > 0) {
for (int i=0; i();
copy.addAll(elcs);
actcopy = new ArrayList();
actcopy.addAll(act);
actcopy.add(elcs.get(i));
copy.remove(i);
ret.addAll(referenceFreeOrder(copy,actcopy));
}
} else {
ret.addAll(referenceMembers(act,new ArrayList()));
}
return ret;
}
private Set> referenceMembers(List elcs,List act)
{
Set> ret = new HashSet>();
ArrayList copy;
ArrayList act2 = new ArrayList();
act2.addAll(act);
if (elcs.size() > 0) {
copy = new ArrayList();
copy.addAll(elcs);
while (!copy.isEmpty()) {
if (copy.get(0).isOptional()) {
act.add(copy.get(0));
copy.remove(0);
ret.addAll(referenceMembers(copy,act));
ret.addAll(referenceMembers(copy,act2));
return ret;
} else {
act.add(copy.get(0));
act2.add(copy.get(0));
copy.remove(0);
}
}
if (!act.isEmpty())
ret.add(act);
} else {
ret.add(act);
}
return ret;
}
// List elements
// -------------
Map chosenSymbols;
Map chosen;
private RuleSymbol listElement(LanguageModel m,LanguageMember ct,MemberContent cm,boolean ref)
{
List separator = null;
LanguageElement el = m.getClassToElement().get(ct.getElementClass());
if (ct.getSeparator() != null)
separator = ct.getSeparator();
else if (el.getSeparator() != null)
separator = el.getSeparator();
int extraPos;
if (cm == null) {
extraPos = -1;
chosenSymbols = null;
} else {
if (cm.getContent().isReference())
chosenSymbols = references;
else
chosenSymbols = symbols;
extraPos = cm.getPosition();
}
if (ref)
chosen = references;
else
chosen = symbols;
if (extraPos == -1) {
return listElementDefault(ct, ref, separator, m, el, cm);
} else if (extraPos == Position.BEFORELAST) {
return listElementBeforeLast(ct, ref, separator, m, el, cm);
} else { // if (pos == Position.WITHIN) {
return listElementWithin(ct, ref, separator, m, el, cm);
}
}
// Delimiter metadata
class ListDecorator
{
LanguageElement element;
int position;
SeparatorPolicy separatorPolicy;
RuleSymbol symbol;
List prefix;
List suffix;
public ListDecorator (LanguageModel m, MemberContent cm)
{
if (cm == null) {
element = null;
position = -1;
separatorPolicy = null;
prefix = null;
suffix = null;
symbol = null;
} else {
LanguageMember member = cm.getContent();
element = m.getClassToElement().get(member.getElementClass());
position = cm.getPosition();
separatorPolicy = cm.getSeparatorPolicy();
prefix = member.getPrefix();
suffix = member.getSuffix();
symbol = new RuleSymbol(chosenSymbols.get(element).getType(),member);
}
}
}
// L -> E
// L -> E L
// L0 -> L
// L0 -> epsilon
private RuleSymbol listElementDefault(LanguageMember ct, boolean ref,
List separator, LanguageModel m, LanguageElement el, MemberContent cm)
{
ListDecorator decorator = new ListDecorator(m,cm);
ListIdentifier l1 = new ListIdentifier(el,separator,ref,false,decorator.element,decorator.position,decorator.separatorPolicy,'1');
ListIdentifier l0 = new ListIdentifier(el,separator,ref,true,decorator.element,decorator.position,decorator.separatorPolicy,'0');
RuleSymbol re = lists.get(l1);
RuleSymbol re0 = lists.get(l0);
if (re == null) {
SymbolIdentifier id = new SymbolIdentifier(SymbolType.LIST,el,separator,ref);
re = new RuleSymbol(id);
lists.put(l1,re);
// L -> E
rules.add( ruleLE(el, re) );
// L -> E L
rules.add( ruleLEL(separator, el, re) );
}
if (((MemberCollection)ct).getMinimumMultiplicity()==0) {
if (re0 == null) {
SymbolIdentifier id = new SymbolIdentifier(SymbolType.LIST_ZERO,el,separator,ref);
re0 = new RuleSymbol(id);
lists.put(l0,re0);
// L0 -> L
rules.add(ruleL0L(re, re0));
// L0 -> epsilon
rules.add(ruleL0epsilon(re0));
}
return new RuleSymbol(re0.getType(),ct);
} else {
return new RuleSymbol(re.getType(),ct);
}
}
// L -> E lsep
// L -> (sepPolicy:extra) E
// Lsep -> sep E Lsep
// Lsep -> (sepPolicy:extra) E
private RuleSymbol listElementBeforeLast(LanguageMember ct, boolean ref,
List separator, LanguageModel m, LanguageElement el, MemberContent cm)
{
ListDecorator decorator = new ListDecorator(m,cm);
ListIdentifier ls = new ListIdentifier(el,separator,ref,false,decorator.element,decorator.position,decorator.separatorPolicy,'s');
ListIdentifier lb = new ListIdentifier(el,separator,ref,false,decorator.element,decorator.position,decorator.separatorPolicy,'b');
RuleSymbol res = lists.get(ls);
RuleSymbol reb = lists.get(lb);
if (res == null) {
SymbolIdentifier id = new SymbolIdentifier(SymbolType.LIST_SEP,el,separator,ref);
res = new RuleSymbol(id);
lists.put(ls,res);
//Lsep -> sep E Lsep
rules.add(ruleLSsepELS(separator, el, res));
//Lsep -> (sepPolicy:extra) E
rules.add(ruleLSsepE(separator, el, decorator, res));
}
if (reb == null) {
SymbolIdentifier id = new SymbolIdentifier(SymbolType.LIST_BEFORE_LAST,el,separator,ref);
reb = new RuleSymbol(id);
lists.put(lb,reb);
//L -> E lsep
rules.add(ruleLEsep(el, res, reb));
//L -> (sepPolicy:extra) E
rules.add(ruleLsepE(separator, el, decorator, reb));
}
RuleSymbol ro = new RuleSymbol(reb.getType(),ct);
return ro;
}
// L -> E
// L -> E L
// Lw -> L (sepPolicy:extra) L
// Lw -> (sepPolicy:extra) L
// Lw -> L (sepPolicy:extra)
// Lw -> (sepPolicy:extra)
private RuleSymbol listElementWithin(LanguageMember ct, boolean ref,
List separator, LanguageModel m, LanguageElement el, MemberContent cm)
{
ListDecorator decorator = new ListDecorator(m,cm);
ListIdentifier la = new ListIdentifier(el,separator,ref,false,decorator.element,decorator.position,decorator.separatorPolicy,'a');
ListIdentifier lw = new ListIdentifier(el,separator,ref,false,decorator.element,decorator.position,decorator.separatorPolicy,'w');
RuleSymbol rea = lists.get(la);
RuleSymbol rew = lists.get(lw);
if (rea == null) {
SymbolIdentifier id = new SymbolIdentifier(SymbolType.LIST_ELEMENT,el,separator,ref);
rea = new RuleSymbol(id);
lists.put(la,rea);
//L -> E
rules.add(ruleLE(el, rea));
// L -> E L
rules.add(ruleLEL(separator, el, rea));
}
if (rew == null) {
SymbolIdentifier id = new SymbolIdentifier(SymbolType.LIST_WITHIN,el,separator,ref);
rew = new RuleSymbol(id);
lists.put(lw,rew);
//Lw -> L (sepPolicy:extra) L
rules.add(ruleLWLsepL(separator, decorator, rea, rew));
//Lw -> (sepPolicy:extra) L
rules.add(ruleLWsepL(separator, decorator, rea, rew));
//Lw -> L (sepPolicy:extra)
rules.add(ruleLWLsep(separator, decorator, rea, rew));
//Lw -> (sepPolicy:extra)
rules.add(ruleLWsep(decorator.symbol, rew));
}
RuleSymbol ro = new RuleSymbol(rew.getType(),ct);
return ro;
}
// Lw -> L (sepPolicy:extra) L
private Rule ruleLWLsepL(List separator,
ListDecorator decorator, RuleSymbol rea, RuleSymbol rew)
{
Rule r = new Rule(rew);
r.setType(Rule.Type.REPETITION);
r.add(rea);
addExtraSep(r, separator, decorator);
r.add(rea);
return r;
}
// Lw -> (sepPolicy:extra) L
private Rule ruleLWsepL(List separator,
ListDecorator decorator, RuleSymbol rea, RuleSymbol rew) {
Rule r = new Rule(rew);
r.setType(Rule.Type.REPETITION);
addExtraSep(r, separator, decorator);
r.add(rea);
return r;
}
// Lw -> L (sepPolicy:extra)
private Rule ruleLWLsep(List separator,
ListDecorator decorator, RuleSymbol rea, RuleSymbol rew) {
Rule r = new Rule(rew);
r.setType(Rule.Type.REPETITION);
r.add(rea);
addExtraSep(r, separator, decorator);
return r;
}
// Lw -> (sepPolicy:extra)
private Rule ruleLWsep(RuleSymbol extraRe, RuleSymbol rew) {
Rule r = new Rule(rew);
r.setType(Rule.Type.REPETITION);
r.add(extraRe);
return r;
}
// L -> E
private Rule ruleLE(LanguageElement el, RuleSymbol rea)
{
Rule r = new Rule(rea);
r.setType(Rule.Type.REPETITION);
RuleSymbol element = chosen.get(el);
r.add(element);
r.setElement(element);
return r;
}
// L -> E L
private Rule ruleLEL(List separator, LanguageElement el, RuleSymbol re) {
Rule r = new Rule(re);
r.setType(Rule.Type.REPETITION);
RuleSymbol element = chosen.get(el);
r.add(element);
r.setElement(element);
addDelimiters(r, separator);
r.add(re);
return r;
}
// Lsep -> sep E Lsep
private Rule ruleLSsepELS(List separator,
LanguageElement el, RuleSymbol res) {
Rule r = new Rule(res);
r.setType(Rule.Type.REPETITION);
addDelimiters(r, separator);
RuleSymbol element = chosen.get(el);
r.add(element);
r.setElement(element);
r.add(res);
return r;
}
// Lsep -> (sepPolicy:extra) E
private Rule ruleLSsepE(List separator,
LanguageElement el, ListDecorator metadata, RuleSymbol res) {
Rule r = new Rule(res);
r.setType(Rule.Type.REPETITION);
addExtraSep(r, separator, metadata);
RuleSymbol element = chosen.get(el);
r.add(element);
r.setElement(element);
return r;
}
// L -> E lsep
private Rule ruleLEsep(LanguageElement el, RuleSymbol res, RuleSymbol reb) {
Rule r = new Rule(reb);
r.setType(Rule.Type.REPETITION);
RuleSymbol element = chosen.get(el);
r.add(element);
r.setElement(element);
r.add(res);
return r;
}
// L -> (sepPolicy:extra) E
private Rule ruleLsepE(List separator,
LanguageElement el, ListDecorator decorator, RuleSymbol reb)
{
Rule r = new Rule(reb);
r.setType(Rule.Type.REPETITION);
addExtraSep(r, separator, decorator);
RuleSymbol element = chosen.get(el);
r.add(element);
r.setElement(element);
return r;
}
private void addExtraSep(Rule r, List separator, ListDecorator decorator)
{
switch (decorator.separatorPolicy) {
case AFTER:
addDelimiters(r, separator);
addDelimiters(r, decorator.prefix);
r.add(decorator.symbol);
addDelimiters(r, decorator.suffix);
break;
case BEFORE:
addDelimiters(r, decorator.prefix);
r.add(decorator.symbol);
addDelimiters(r, decorator.suffix);
addDelimiters(r, separator);
break;
case EXTRA:
addDelimiters(r, separator);
addDelimiters(r, decorator.prefix);
r.add(decorator.symbol);
addDelimiters(r, decorator.suffix);
addDelimiters(r, separator);
break;
case REPLACE:
addDelimiters(r, decorator.prefix);
r.add(decorator.symbol);
addDelimiters(r, decorator.suffix);
break;
}
}
// L0 -> L
private Rule ruleL0L(RuleSymbol re, RuleSymbol re0) {
Rule r = new Rule(re0);
r.setType(Rule.Type.COPY);
r.add(re);
return r;
}
// L0 -> epsilon
private Rule ruleL0epsilon(RuleSymbol re0) {
Rule r = new Rule(re0);
r.setType(Rule.Type.REPETITION);
return r;
}
}