All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.common.urls.UrlUtils.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.common.urls

import ai.platon.pulsar.common.config.AppConstants.INTERNAL_URL_PREFIX
import org.apache.commons.lang3.StringUtils
import org.apache.http.client.utils.URIBuilder
import java.net.MalformedURLException
import java.net.URI
import java.net.URISyntaxException
import java.net.URL

object UrlUtils {

    /**
     * Test if the url is an internal URL. Internal URLs are URLs that are used to identify internal resources and
     * will never be fetched from the internet.
     *
     * @param  url   The url to test
     * @return true if the given str is an internal URL, false otherwise
     * */
    @JvmStatic
    fun isInternal(url: String) = url.startsWith(INTERNAL_URL_PREFIX)

    /**
     * Test if the url is not an internal URL. Internal URLs are URLs that are used to identify internal resources and
     * will never be fetched from the internet.
     *
     * @param  url   The url to test
     * @return true if the given str is not an internal URL, false otherwise
     * */
    @JvmStatic
    fun isNotInternal(url: String) = !isInternal(url)

    /**
     * Creates a {@code URL} object from the {@code String}
     * representation.
     *
     * @param      spec   the {@code String} to parse as a URL.
     * @return     the URL parsed from [spec],
     *             or null if no protocol is specified, or an
     *               unknown protocol is found, or {@code spec} is {@code null},
     *               or the parsed URL fails to comply with the specific syntax
     *               of the associated protocol.
     * @see        java.net.URL#URL(java.net.URL)
     */
    @JvmStatic
    fun getURLOrNull(spec: String?): URL? {
        if (spec.isNullOrBlank()) {
            return null
        }

        return kotlin.runCatching { URL(spec) }.getOrNull()
    }

    /**
     * Test if the str is a standard URL.
     *
     * @param  str   The string to test
     * @return true if the given str is a a standard URL, false otherwise
     * */
    @JvmStatic
    fun isStandard(str: String?): Boolean {
        return getURLOrNull(str) != null
    }

    /**
     * Normalize a url spec.
     *
     * A URL may have appended to it a "fragment", also known as a "ref" or a "reference".
     * The fragment is indicated by the sharp sign character "#" followed by more characters.
     * For example: http://java.sun.com/index.html#chapter1
     *
     * The fragment will be removed after the normalization.
     * If ignoreQuery is true, the query string will be removed.
     *
     * @param url
     *        The url to normalize, a tailing argument list is allowed and will be removed
     *
     * @param ignoreQuery
     *        If true, the result url does not contain a query string
     *
     * @return The normalized URL
     * @throws URISyntaxException
     *         If the given string violates RFC 2396
     * @throws MalformedURLException
     * @throws IllegalArgumentException
     * */
    @JvmStatic
    @Throws(URISyntaxException::class, IllegalArgumentException::class, MalformedURLException::class)
    fun normalize(url: String, ignoreQuery: Boolean = false): URL {
        val (url0, _) = splitUrlArgs(url)

        val uriBuilder = URIBuilder(url0)
        uriBuilder.fragment = null
        if (ignoreQuery) {
            uriBuilder.removeQuery()
        }
        return uriBuilder.build().toURL()
    }

    /**
     * Normalize a url spec.
     *
     * A URL may have appended to it a "fragment", also known as a "ref" or a "reference".
     * The fragment is indicated by the sharp sign character "#" followed by more characters.
     * For example: http://java.sun.com/index.html#chapter1
     *
     * The fragment will be removed after the normalization.
     * If ignoreQuery is true, the query string will be removed.
     *
     * @param url
     *        The url to normalize, a tailing argument list is allowed and will be removed
     *
     * @param ignoreQuery
     *        If true, the result url does not contain a query string
     *
     * @return The normalized url,
     *         or an empty string ("") if the given string violates RFC 2396
     * */
    @JvmStatic
    fun normalizeOrEmpty(url: String, ignoreQuery: Boolean = false): String {
        return try {
            normalize(url, ignoreQuery).toString()
        } catch (e: Exception) {
            ""
        }
    }

    /**
     * Normalize a url spec.
     *
     * A URL may have appended to it a "fragment", also known as a "ref" or a "reference".
     * The fragment is indicated by the sharp sign character "#" followed by more characters.
     * For example: http://java.sun.com/index.html#chapter1
     *
     * The fragment will be removed after the normalization.
     * If ignoreQuery is true, the query string will be removed.
     *
     * @param url
     *        The url to normalize, a tailing argument list is allowed and will be removed
     *
     * @param ignoreQuery
     *        If true, the result url does not contain a query string
     *
     * @return The normalized url,
     *         or null if the given string violates RFC 2396
     * */
    @JvmStatic
    fun normalizeOrNull(url: String, ignoreQuery: Boolean = false): String? {
        return try {
            normalize(url, ignoreQuery).toString()
        } catch (e: Exception) {
            null
        }
    }

    /**
     * Normalize a url spec.
     *
     * A URL may have appended to it a "fragment", also known as a "ref" or a "reference".
     * The fragment is indicated by the sharp sign character "#" followed by more characters.
     * For example: http://java.sun.com/index.html#chapter1
     *
     * The fragment will be removed after the normalization.
     * If ignoreQuery is true, the query string will be removed.
     *
     * @param urls
     *        The urls to normalize, a tailing argument list is allowed and will be removed
     *
     * @param ignoreQuery
     *        If true, the result url does not contain a query string
     *
     * @return The normalized URLs
     * */
    @JvmStatic
    fun normalizeUrls(urls: Iterable, ignoreQuery: Boolean = false): List {
        return urls.mapNotNull { normalizeOrNull(it, ignoreQuery) }
    }

    /**
     * Split the query parameters of a url.
     *
     * @param url The url to split
     * @return The query parameters of the url
     * */
    @Throws(URISyntaxException::class)
    fun splitQueryParameters(url: String): Map {
        return URIBuilder(url).queryParams?.associate { it.name to it.value } ?: mapOf()
    }

    /**
     * Get the query parameter of a url.
     *
     * @param url The url to split
     * @param parameterName The name of the query parameter
     * @return The query parameter of the url
     * */
    @Throws(URISyntaxException::class)
    fun getQueryParameters(url: String, parameterName: String): String? {
        return URIBuilder(url).queryParams?.firstOrNull { it.name == parameterName }?.value
    }

    /**
     * Remove the query parameters of a url.
     *
     * @param url The url to split
     * @param parameterNames The names of the query parameters
     * @return The url without the query parameters
     * */
    @Throws(URISyntaxException::class)
    fun removeQueryParameters(url: String, vararg parameterNames: String): String {
        val uriBuilder = URIBuilder(url)
        uriBuilder.setParameters(uriBuilder.queryParams.apply { removeIf { it.name in parameterNames } })
        return uriBuilder.build().toString()
    }

    /**
     * Keep the query parameters of a url, and remove the others.
     *
     * @param url The url to split
     * @param parameterNames The names of the query parameters
     * @return The url with only the query parameters
     * */
    @Throws(URISyntaxException::class)
    fun keepQueryParameters(url: String, vararg parameterNames: String): String {
        val uriBuilder = URIBuilder(url)
        uriBuilder.setParameters(uriBuilder.queryParams.apply { removeIf { it.name !in parameterNames } })
        return uriBuilder.build().toString()
    }

    /**
     * Resolve relative URL-s and fix a java.net.URL error in handling of URLs
     * with pure query targets.
     *
     * @param base   base url
     * @param target target url (may be relative)
     * @return resolved absolute url.
     * @throws MalformedURLException
     */
    @Throws(MalformedURLException::class)
    @JvmStatic
    fun resolveURL(base: URL, targetUrl: String): URL {
        val target = targetUrl.trim()

        // handle the case that there is a target that is a pure query,
        // for example
        // http://careers3.accenture.com/Careers/ASPX/Search.aspx?co=0&sk=0
        // It has urls in the page of the form href="?co=0&sk=0&pg=1", and by
        // default
        // URL constructs the base+target combo as
        // http://careers3.accenture.com/Careers/ASPX/?co=0&sk=0&pg=1, incorrectly
        // dropping the Search.aspx target
        //
        // Browsers handle these just fine, they must have an exception similar to
        // this
        return if (target.startsWith("?")) {
            fixPureQueryTargets(base, target)
        } else URL(base, target)
    }

    /**
     * Handle the case in RFC3986 section 5.4.1 example 7, and similar.
     *
     * @param base      base url
     * @param targetUrl target url
     * @return resolved absolute url.
     */
    private fun fixPureQueryTargets(base: URL, targetUrl: String): URL {
        var target = targetUrl.trim()
        if (!target.startsWith("?")) {
            return URL(base, target)
        }

        val basePath = base.path
        var baseRightMost = ""
        val baseRightMostIdx = basePath.lastIndexOf("/")
        if (baseRightMostIdx != -1) {
            baseRightMost = basePath.substring(baseRightMostIdx + 1)
        }

        if (target.startsWith("?")) {
            target = baseRightMost + target
        }

        return URL(base, target)
    }

    /**
     * Split url and args
     *
     * @param configuredUrl url and args in `$url $args` format
     * @return url and args pair
     */
    @JvmStatic
    fun splitUrlArgs(configuredUrl: String): Pair {
        var url = configuredUrl.trim().replace("[\\r\\n\\t]".toRegex(), "");
        val pos = url.indexOfFirst { it.isWhitespace() }

        var args = ""
        if (pos >= 0) {
            args = url.substring(pos)
            url = url.substring(0, pos)
        }

        return url.trim() to args.trim()
    }

    /**
     * Merge url and args
     *
     * @param url  url
     * @param args args
     * @return url and args in `$url $args` format
     */
    @JvmStatic
    fun mergeUrlArgs(url: String, args: String? = null): String {
        return if (args.isNullOrBlank()) url.trim() else "${url.trim()} ${args.trim()}"
    }

    /**
     * Get the url without parameters
     *
     * @param url url
     * @return url without parameters
     */
    @JvmStatic
    fun getUrlWithoutParameters(url: String): String {
        try {
            var uri = URI(url)
            uri = URI(uri.scheme,
                    uri.authority,
                    uri.path,
                    null, // Ignore the query part of the input url
                    uri.fragment)
            return uri.toString()
        } catch (ignored: Throwable) {
        }

        return ""
    }
    
    /**
     * Returns the normalized url and key
     *
     * @param originalUrl
     * @param norm
     * @return normalized url and key
     */
    @JvmStatic
    fun normalizedUrlAndKey(originalUrl: String, norm: Boolean = false): Pair {
        val url = if (norm) (normalizeOrNull(originalUrl) ?: "") else originalUrl
        val key = reverseUrlOrEmpty(url)
        return url to key
    }

    /**
     * Reverses a url's domain. This form is better for storing in hbase. Because
     * scans within the same domain are faster.
     *
     * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
     * "com.foo.bar:8983:http/to/index.html?a=b".
     *
     * @param url url to be reversed
     * @return Reversed url
     * @throws MalformedURLException
     */
    @JvmStatic
    fun reverseUrl(url: String): String {
        return reverseUrl(URL(url))
    }
    
    /**
     * Reverses a url's domain. This form is better for storing in hbase. Because
     * scans within the same domain are faster.
     *
     * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
     * "com.foo.bar:8983:http/to/index.html?a=b".
     *
     * @param url url to be reversed
     * @return Reversed url or empty string if the url is invalid
     */
    @JvmStatic
    fun reverseUrlOrEmpty(url: String): String {
        return try {
            reverseUrl(URL(url))
        } catch (e: MalformedURLException) {
            ""
        }
    }

    /**
     * Reverses a url's domain. This form is better for storing in hbase. Because
     * scans within the same domain are faster.
     *
     * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
     * "com.foo.bar:8983:http/to/index.html?a=b".
     *
     * @param url url to be reversed
     * @return Reversed url or null if the url is invalid
     */
    @JvmStatic
    fun reverseUrlOrNull(url: String): String? {
        return try {
            reverseUrl(URL(url))
        } catch (e: MalformedURLException) {
            null
        }
    }

    /**
     * Reverses a url's domain. This form is better for storing in hbase. Because scans within the same domain are
     * faster.
     *
     * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes "com.foo.bar:http:8983/to/index.html?a=b".
     *
     * @param url url to be reversed
     * @return Reversed url
     */
    @JvmStatic
    fun reverseUrl(url: URL): String {
        val host = url.host
        val file = url.file
        val protocol = url.protocol
        val port = url.port

        val buf = StringBuilder()

        /* reverse host */
        reverseAppendSplits(host, buf)

        /* put protocol */
        buf.append(':')
        buf.append(protocol)

        /* put port if necessary */
        if (port != -1) {
            buf.append(':')
            buf.append(port)
        }

        /* put path */
        if (file.isNotEmpty() && '/' != file[0]) {
            buf.append('/')
        }
        buf.append(file)

        return buf.toString()
    }

    /**
     * Get the reversed and tenanted format of unreversedUrl, unreversedUrl can be both tenanted or not tenanted
     * This method might change the tenant id of the original url
     *
     * Zero tenant id means no tenant
     *
     * @param unreversedUrl the unreversed url, can be both tenanted or not tenanted
     * @return the tenanted and reversed url of unreversedUrl
     */
    @JvmStatic
    fun reverseUrl(tenantId: Int, unreversedUrl: String): String {
        val tenantedUrl = TenantedUrl.split(unreversedUrl)
        return TenantedUrl.combine(tenantId, reverseUrl(tenantedUrl.url))
    }
    
    /**
     * Get the unreversed url of a reversed url.
     *
     * @param reversedUrl
     * @return the unreversed url of reversedUrl
     */
    @JvmStatic
    fun unreverseUrl(reversedUrl: String): String {
        val buf = StringBuilder(reversedUrl.length + 2)

        var pathBegin = reversedUrl.indexOf('/')
        if (pathBegin == -1) {
            pathBegin = reversedUrl.length
        }
        val sub = reversedUrl.substring(0, pathBegin)

        val splits = StringUtils.splitPreserveAllTokens(sub, ':') // {, , }

        buf.append(splits[1]) // put protocol
        buf.append("://")
        reverseAppendSplits(splits[0], buf) // splits[0] is reversed
        // host
        if (splits.size == 3) { // has a port
            buf.append(':')
            buf.append(splits[2])
        }

        buf.append(reversedUrl.substring(pathBegin))

        return buf.toString()
    }

    /**
     * Get the unreversed url of a reversed url.
     *
     * @param reversedUrl
     * @return the unreversed url of reversedUrl or null if the url is invalid
     */
    @JvmStatic
    fun unreverseUrlOrNull(reversedUrl: String) = kotlin.runCatching { unreverseUrl(reversedUrl) }.getOrNull()

    /**
     * Get unreversed and tenanted url of reversedUrl, reversedUrl can be both tenanted or not tenanted,
     * This method might change the tenant id of the original url
     *
     * @param tenantId    the expected tenant id of the reversedUrl
     * @param reversedUrl the reversed url, can be both tenanted or not tenanted
     * @return the unreversed url of reversedTenantedUrl
     * @throws MalformedURLException
     */
    @JvmStatic
    fun unreverseUrl(tenantId: Int, reversedUrl: String): String {
        val tenantedUrl = TenantedUrl.split(reversedUrl)
        return TenantedUrl.combine(tenantId, unreverseUrl(tenantedUrl.url))
    }

    /**
     * Get start key for tenanted table
     *
     * @param unreversedUrl unreversed key, which is the original url
     * @return reverse and tenanted key
     */
    @JvmStatic
    fun getStartKey(tenantId: Int, unreversedUrl: String?): String? {
        if (unreversedUrl == null) {
            // restricted within tenant space
            return if (tenantId == 0) null else tenantId.toString()
        }

        //    if (StringUtils.countMatches(unreversedUrl, "0001") > 1) {
        //      return null;
        //    }

        val startKey = decodeKeyLowerBound(unreversedUrl)
        return reverseUrl(tenantId, startKey)
    }

    /**
     * Get start key for non-tenanted table
     *
     * @param unreversedUrl unreversed key, which is the original url
     * @return reverse key
     */
    @JvmStatic
    fun getStartKey(unreversedUrl: String?): String? {
        if (unreversedUrl == null) {
            return null
        }

        //    if (StringUtils.countMatches(unreversedUrl, "0001") > 1) {
        //      return null;
        //    }

        val startKey = decodeKeyLowerBound(unreversedUrl)
        return reverseUrl(startKey)
    }

    /**
     * Get end key for non-tenanted tables
     *
     * @param unreversedUrl unreversed key, which is the original url
     * @return reverse, key bound decoded key
     */
    @JvmStatic
    fun getEndKey(unreversedUrl: String?): String? {
        if (unreversedUrl == null) {
            return null
        }

        //    if (StringUtils.countMatches(unreversedUrl, "FFFF") > 1) {
        //      return null;
        //    }

        val endKey = decodeKeyUpperBound(unreversedUrl)
        return reverseUrl(endKey)
    }

    /**
     * Get end key for tenanted tables
     *
     * @param unreversedUrl unreversed key, which is the original url
     * @return reverse, tenanted and key bound decoded key
     */
    @JvmStatic
    fun getEndKey(tenantId: Int, unreversedUrl: String?): String? {
        if (unreversedUrl == null) {
            // restricted within tenant space
            return if (tenantId == 0) null else (tenantId + 1).toString()
        }

        //    if (StringUtils.countMatches(unreversedUrl, "FFFF") > 1) {
        //      return null;
        //    }

        val endKey = decodeKeyUpperBound(unreversedUrl)
        return reverseUrl(tenantId, endKey)
    }

    /**
     * We use unicode character \u0001 to be the lower key bound, but the client usally
     * encode the character to be a string "\\u0001" or "\\\\u0001", so we should decode
     * them to be the right one
     *
     * Note, the character is displayed as +0001> in some output system
     *
     * Now, we consider all the three character/string \u0001, "\\u0001", "\\\\u0001"
     * are the lower key bound
     */
    @JvmStatic
    fun decodeKeyLowerBound(startKey: String): String {
        var startKey = startKey
        startKey = startKey.replace("\\\\u0001".toRegex(), "\u0001")
        startKey = startKey.replace("\\u0001".toRegex(), "\u0001")

        return startKey
    }

    /**
     * We use unicode character \uFFFF to be the upper key bound, but the client usally
     * encode the character to be a string "\\uFFFF" or "\\\\uFFFF", so we should decode
     * them to be the right one
     *
     *
     * Note, the character may display as +FFFF> in some output system
     *
     *
     * Now, we consider all the three character/string \uFFFF, "\\uFFFF", "\\\\uFFFF"
     * are the upper key bound
     */
    @JvmStatic
    fun decodeKeyUpperBound(endKey: String): String {
        var endKey = endKey
        // Character lastChar = Character.MAX_VALUE;
        endKey = endKey.replace("\\\\uFFFF".toRegex(), "\uFFFF")
        endKey = endKey.replace("\\uFFFF".toRegex(), "\uFFFF")

        return endKey
    }

    /**
     * Given a reversed url, returns the reversed host E.g
     * "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar"
     *
     * @param reversedUrl Reversed url
     * @return Reversed host
     */
    @JvmStatic
    fun getReversedHost(reversedUrl: String): String {
        return reversedUrl.substring(0, reversedUrl.indexOf(':'))
    }

    /**
     * Reverse the host name.
     *
     * @param hostName host name
     * @return reversed host name
     */
    @JvmStatic
    fun reverseHost(hostName: String): String {
        val buf = StringBuilder()
        reverseAppendSplits(hostName, buf)
        return buf.toString()
    }

    /**
     * Unreverse the host name.
     *
     * @param reversedHostName reversed host name
     * @return host name
     */
    @JvmStatic
    fun unreverseHost(reversedHostName: String): String {
        return reverseHost(reversedHostName) // Reversible
    }
    
    private fun reverseAppendSplits(string: String, buf: StringBuilder) {
        val splits = StringUtils.split(string, '.')
        if (splits.isNotEmpty()) {
            for (i in splits.size - 1 downTo 1) {
                buf.append(splits[i])
                buf.append('.')
            }
            buf.append(splits[0])
        } else {
            buf.append(string)
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy