All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.integratedmodelling.soil.wrb.WRBParser Maven / Gradle / Ivy

The newest version!
package org.integratedmodelling.soil.wrb;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.integratedmodelling.api.knowledge.IConcept;
import org.integratedmodelling.common.utils.jtopas.ReaderSource;
import org.integratedmodelling.common.utils.jtopas.StandardTokenizer;
import org.integratedmodelling.common.utils.jtopas.StandardTokenizerProperties;
import org.integratedmodelling.common.utils.jtopas.Token;
import org.integratedmodelling.common.utils.jtopas.Tokenizer;
import org.integratedmodelling.exceptions.KlabValidationException;
import org.integratedmodelling.soil.wrb.WRBIdentity.WRBConcept;
import org.integratedmodelling.soil.wrb.vocabulary.WRBVocabulary;

/**
 * Main WRB parser in charge of the conversion string -> WRB identity.
 * 
 * 
 * Production rules for WRB grammar:
 * 
 * TERMINALS: 
 * 		Q (qualifier term, second level, from vocabulary)
 * 		S (specifier term, from vocabulary)
 * 		G (group term, first level, from vocabulary)
 * 
 * PRODUCTIONS:
 * 	
 * 		P := S?Q            // prefix
 * 		X := (S'-')?G'ic'   // group used as qualifier, with potential specifier
 * 		QualifierList := '(' P (',' P)* ')'? 
 * 		QualifiedGroupList := '(' X (',' X)* ')'
 * 		Model  := P* G (QualifierList? ~ QualifiedGroupList?)
 *  
 * 
* * @author Ferd * */ public class WRBParser { static boolean initialized = false; /* * Some sample specifications * from SIS:SoilProfile at http://93.63.35.107:8080/geoserver (field should be WRB2006 but this is in xl_en_4952) */ static String[] samples = { "Endoleptic Regosols", "Haplic Vertisols (Eutric, Endoskeletic)", "Haplic Cambisols (Eutric)", "Placic Phaeozems" }; /** * Parse a WRB string into the intermediate representation. * * @param definition * @return * @throws KlabValidationException */ public static WRBIdentity parse(String definition) throws KlabValidationException { StandardTokenizerProperties properties = new StandardTokenizerProperties(); properties.setSeparators("(),"); try (Tokenizer tokenizer = new StandardTokenizer(properties)) { tokenizer.setSource(new ReaderSource(new StringReader(definition))); WRBIdentity ret = new WRBIdentity(); while (tokenizer.hasMoreTokens()) { Token token = tokenizer.nextToken(); if (token.getType() == token.EOF) { break; } if (token.getImage().equals("(")) { ret.openSpecifierGroup(); } else if (token.getImage().equals(")")) { ret.closeSpecifierGroup(); } else if (!token.getImage().equals(",")) { ret.addToken(token.getImage()); } } ret.closeParsing(); return ret; } } /** * Parse a (valid) intermediate representation into a fully qualified concept. * * @param identity * @return */ public static IConcept conceptualize(WRBIdentity identity) { return null; } /** * Chomp away specifiers from the front of the term until no more modifiers are * present, then present all together as a WRBConcept. No validation other than * term hunting. * * NOTE: allows more specifiers but grammar allows only one. Left here for * validation - parser must check number of specifiers and can produce informative * error if invalid. * * @param term * @return * @throws KlabValidationException */ public static WRBIdentity.WRBConcept chomp(String term) throws KlabValidationException { WRBIdentity.WRBConcept ret = new WRBConcept(); List specs = new ArrayList<>(); /* * FIXME only one spec please */ while (true) { boolean found = false; for (String s : WRBVocabulary.get().SpecifierTerms.keySet()) { if (term.toUpperCase().startsWith(s.toUpperCase())) { term = term.substring(s.length()); specs.add(s); found = true; break; } } if (!found) { if (!WRBVocabulary.get().isQualifier(term)) { throw new KlabValidationException("qualifier " + StringUtils.capitalize(term.toLowerCase()) + " is unknown"); } ret.setMainConcept(term); break; } } if (specs.size() > 0) { ret.setSpecifiers(specs); } return ret; } /** * Return group correspondent to passed token, allowing case mismatch and arbitrary suffix. * * @param token * @return */ public static String getGroupTerm(String token) { for (String s : WRBVocabulary.get().RSGTerms.keySet()) { if (token.toLowerCase().startsWith(s.toLowerCase())) { return s; } } return null; } public static void main(String[] args) throws Exception { for (String d : samples) { System.out.println("Parsing '" + d + "':"); WRBIdentity def = parse(d); System.out.println("Reconstructed definition: " + def + "; concept ID: " + def.getShortId()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy