com.threerings.gwt.util.WikiUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of gwt-utils Show documentation
Utilities for use in developing GWT applications.
The newest version!
//
// $Id$
//
// OOO GWT Utils - utilities for creating GWT applications
// Copyright (C) 2009-2010 Three Rings Design, Inc., All Rights Reserved
// http://code.google.com/p/ooo-gwt-utils/
//
// This library is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation; either version 2.1 of the License, or
// (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package com.threerings.gwt.util;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

/**
 * Used by {@link WikiParser}.
 *
 * @author Yaroslav Stavnichiy ([email protected])
 */
public class WikiUtils
{
    public static boolean isURI (String uri) {
        return !uri.matches(".*\\s.*") && uri.matches(URL_REGEX);
    }

    private final static String URL_REGEX =
        "\\w+://" +            // pcol://
        "([^:@]+(:[^@]+)@)?" + // (username(:password)@)
        "[^:/]+(:\\d+)?" +     // hostname(:port)
        "(/[^/#?]+)*/?" +      // (/pathseg(/pathseg))(/)
        "(\\?[^#]*)?(#.*)?";   // (?query)(#hash)

    public static boolean isAbsoluteURI (String uri) {
        // TODO: make this more sophisticated if needed
        return uri.matches("\\w+://.*");
    }

    public static boolean isUrlChar(char c) {
        // From MediaWiki: "._\\/~%-+&#?!=()@"
        // From http://www.ietf.org/rfc/rfc2396.txt :
        //   reserved:   ";/?:@&=+$,"
        //   unreserved: "-_.!~*'()"
        //   delim:      "%#"
        if (isLatinLetterOrDigit(c)) return true;
        return "/?@&=+,-_.!~()%#;:$*".indexOf(c)>=0; // I excluded '\''
    }

    public static boolean isLatinLetterOrDigit(char c) {
        return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9');
    }

    /**
     * Filters text so there are no '\r' chars in it ("\r\n" -> "\n"; then "\r" -> "\n").
     * Most importantly makes all blank lines (lines with only spaces) exactly like this: "\n\n".
     * WikiParser relies on that.
     */
    public static String preprocessWikiText(String text) {
        if (text==null) return "";
        text=text.trim();
        int length=text.length();
        char[] chars=new char[length];
        text.getChars(0, length, chars, 0);
        StringBuilder sb=new StringBuilder();
        boolean blankLine=true;
        StringBuilder spaces=new StringBuilder();
        for (int p=0; p "\n"; then "\r" -> "\n"
                if (p+1 unent = getHtmlUnEntities();
        for (int ii = 0, ll = s.length(); ii < ll; ii++) {
            char ch = s.charAt(ii);
            String ent = unent.get(ch);
            if (ent != null) {
                sb.append("&").append(ent).append(";");
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();
    }

    public static String unescapeHTML(String value) {
        if (value==null) return null;
        if (value.indexOf('&')<0) return value;
        Map ent = getHtmlEntities();
        StringBuffer sb=new StringBuffer();
        final int length=value.length();
        for (int i=0; ii && i1-i<=12) {
                    if (value.charAt(i+1)=='#') {
                        if (value.charAt(i+2)=='x') {
                            ce=(char)atoi(value.substring(i+3, i1), 16);
                        }
                        else {
                            ce=(char)atoi(value.substring(i+2, i1));
                        }
                    }
                    else {
                        synchronized (ent) {
                            Character ceObj=ent.get(value.substring(i+1, i1));
                            ce=ceObj==null?0:ceObj.charValue();
                        }
                    }
                }
                if (ce>0) {
                    sb.append(ce);
                    i=i1;
                }
                else sb.append(c);
            }
            else {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    static public int atoi(String s) {
        try {
            return Integer.parseInt(s);
        }
        catch (Throwable ex) {
            return 0;
        }
    }

    static public int atoi(String s, int base) {
        try {
            return Integer.parseInt(s, base);
        }
        catch (Throwable ex) {
            return 0;
        }
    }

    public static String replaceString(String str, String from, String to) {
        StringBuffer buf = new StringBuffer();
        int flen = from.length();
        int i1=0, i2=0;
        while ( (i2 = str.indexOf(from,i1)) >= 0 ) {
            buf.append(str.substring(i1, i2));
            buf.append(to);
            i1 = i2 + flen;
        }
        buf.append(str.substring(i1));
        return buf.toString();
    }

    public static String[] split(String s, char separator) {
        // this is meant to be faster than String.split() when separator is not regexp
        if (s==null) return null;
        ArrayList parts=new ArrayList();
        int beginIndex=0, endIndex;
        while ((endIndex=s.indexOf(separator, beginIndex))>=0) {
            parts.add(s.substring(beginIndex, endIndex));
            beginIndex=endIndex+1;
        }
        parts.add(s.substring(beginIndex));
        String[] a=new String[parts.size()];
        return parts.toArray(a);
    }

    /**
     * Translates all non-basic-latin-letters characters into latin ones for use in URLs etc.
     * Here is the implementation for cyrillic (Russian) alphabet. Unknown characters are omitted.
     *
     * @param s string to be translated
     * @return translated string
     */
    public static String translit(String s) {
        if (s==null) return "";
        StringBuilder sb=new StringBuilder(s.length()+100);
        final int length=s.length();
        final int translitTableLength=translitTable.length();

        for (int i=0; i getHtmlEntities () {
        if (_entities == null) {
            _entities = new HashMap();
            _entities.put("lt", '<');
            _entities.put("gt", '>');
            _entities.put("amp", '&');
            _entities.put("quot", '"');
            _entities.put("apos", '\'');
            _entities.put("nbsp", '\u00A0');
            _entities.put("shy", '\u00AD');
            _entities.put("copy", '\u00A9');
            _entities.put("reg", '\u00AE');
            _entities.put("trade", '\u2122');
            _entities.put("mdash", '\u2014');
            _entities.put("ndash", '\u2013');
            _entities.put("ldquo", '\u201C');
            _entities.put("rdquo", '\u201D');
            _entities.put("euro", '\u20AC');
            _entities.put("middot", '\u00B7');
            _entities.put("bull", '\u2022');
            _entities.put("laquo", '\u00AB');
            _entities.put("raquo", '\u00BB');
        }
        return _entities;
    }

    private static synchronized Map getHtmlUnEntities () {
        if (_unentities == null) {
            _unentities = new HashMap();
            for (Map.Entry entry : getHtmlEntities().entrySet()) {
                _unentities.put(entry.getValue(), entry.getKey());
            }
        }
        return _unentities;
    }

    private static Map _entities;
    private static Map _unentities;

    private static final String translitTable =
        "\ufffda\ufffdb\ufffdv\ufffdg\ufffdd\ufffde\ufffde\ufffdzh\ufffdz\ufffdi\ufffdy\ufffdk" +
        "\ufffdl\ufffdm\ufffdn\ufffdo\ufffdp\ufffdr\ufffds\ufffdt\ufffdu\ufffdf\ufffdh\ufffdts" +
        "\ufffdch\ufffdsh\ufffdsch\ufffd\ufffdy\ufffd\ufffde\ufffdyu\ufffdya\ufffdA\ufffdB\ufffdV" +
        "\ufffdG\ufffdD\ufffdE\ufffdE\ufffdZH\ufffdZ\ufffdI\ufffdY\ufffdK\ufffdL\ufffdM\ufffdN" +
        "\ufffdO\ufffdP\ufffdR\ufffdS\ufffdT\ufffdU\ufffdF\ufffdH\ufffdTS\ufffdCH\ufffdSH" +
        "\ufffdSCH\ufffd\ufffdY\ufffd\ufffdE\ufffdYU\ufffdYA";
}