All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unibz.inf.ontop.utils.R2RMLIRISafeEncoder Maven / Gradle / Ivy

package it.unibz.inf.ontop.utils;

import com.google.common.collect.ImmutableBiMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class R2RMLIRISafeEncoder {

    private static final Logger log = LoggerFactory.getLogger(R2RMLIRISafeEncoder.class);

    /**
     * This table is used for IRI safe encoding according to
     * 

* R2RML *

* The IRI-safe version of a string is obtained by applying the following transformation to any character that is not in the iunreserved production in [RFC3987]: *

    *
  • Convert the character to a sequence of one or more octets using UTF-8 [RFC3629]
  • *
  • Percent-encode each octet [RFC3986]
  • *
*

* RFC 3987 2.2. ABNF for IRI References and IRIs *

     * iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
     * ucschar  = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
     *            / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
     *            / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
     *            / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
     *            / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
     *            / %xD0000-DFFFD / %xE1000-EFFFD
     * 
*

* * We only implement the encoding for the range of basic latin (\u0020 - \u007F) for performance reason. * Other symbols outside of `iunreserved` are mostly control symbols. */ public static final ImmutableBiMap TABLE = ImmutableBiMap.builder() .put("%25", '%') // IMPORTANT: % should be first because the SQL query generated will replace it first .put("%20", ' ') .put("%21", '!') .put("%22", '\"') .put("%23", '#') .put("%24", '$') .put("%26", '&') .put("%27", '\'') .put("%28", '(') .put("%29", ')') .put("%2A", '*') .put("%2B", '+') .put("%2C", ',') // "%2D", "-" iunreserved // "%2E", "." iunreserved .put("%2F", '/') // "0" - "9" .put("%3A", ':') .put("%3B", ';') .put("%3C", '<') .put("%3D", '=') .put("%3E", '>') .put("%3F", '?') .put("%40", '@') // "A" - "Z" .put("%5B", '[') .put("%5C", '\\') .put("%5D", ']') .put("%5E", '^') // "%5F", "_" iunreserved .put("%60", '`') // "a" - "z" .put("%7B", '{') .put("%7C", '|') .put("%7D", '}') // "%7E", "~" iunreserved // .put("%7F", "\u007F") // DEL .build(); /* * percent encoding for a String */ public static String encode(String s) { return StringUtils.encode(s, TABLE.inverse()); } /*** * Given a string representing an IRI, this method will return a String * in which all percent encoded characters (e.g., %20) will * be restored to their original characters (e.g., ' '). */ public static String decode(String encoded) { return StringUtils.decode(encoded, '%', 3, TABLE, (code) -> log.warn("Error decoding an encoded IRI {} (problematic code: {}).", encoded, code)); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy