All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.wiki.parser.LinkParser Maven / Gradle / Ivy

/*
    Licensed to the Apache Software Foundation (ASF) under one
    or more contributor license agreements.  See the NOTICE file
    distributed with this work for additional information
    regarding copyright ownership.  The ASF licenses this file
    to you under the Apache License, Version 2.0 (the
    "License"); you may not use this file except in compliance
    with the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing,
    software distributed under the License is distributed on an
    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    KIND, either express or implied.  See the License for the
    specific language governing permissions and limitations
    under the License.
 */

package org.apache.wiki.parser;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
import java.util.stream.IntStream;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jdom2.Attribute;

/**
 *  Parses JSPWiki-style "augmented" link markup into a Link object
 *  containing the link text, link reference, and any optional link
 *  attributes (as JDOM Attributes).
 *  

* The parser recognizes three link forms: *

*
    *
  1. [Text]
  2. *
  3. [Text | Link]
  4. *
  5. [Text | Link | attributes]
  6. *
*

* where the attributes are space-delimited, each in the form of *

*
 *      name1='value1' name2='value2' name3='value3' (etc.) 
*

* If the attribute parsing fails, the parser will still return the * basic link, writing a warning to the log. *

* *

Permitted Attributes

*

* Attributes that aren't declared on <a> or those that * permit scripting in HTML (as this is a security risk) are ignored * and have no effect on parsing, nor show up in the resulting attribute * list). The 'href' and 'name' attributes are also ignored as spurious. * The permitted list is: 'accesskey', 'charset', 'class', 'hreflang', * 'id', 'lang', 'dir', 'rel', 'rev', 'style' , 'tabindex', 'target' , * 'title', and 'type'. The declared attributes that will be ignored * are: 'href', 'name', 'shape', 'coords', 'onfocus', 'onblur', or any * of the other 'on*' event attributes. *

*

* The permitted attributes and target attribute values are static * String arrays ({@link #PERMITTED_ATTRIBUTES} and * {@link #PERMITTED_TARGET_VALUES} resp.) that could be compile-time * modified (i.e., predeclared). *

* *

Permitted Values on Target Attribute

*

* The following target names are reserved in HTML 4 and have special * meanings. These are the only values permitted by the parser. *

*
_blank
*
The user agent should load the designated document in a new, * unnamed window.
*
_self
*
The user agent should load the document in the same frame as * the element that refers to this target.
*
_parent
*
The user agent should load the document into the immediate * FRAMESET parent of the current frame. This value is equivalent to * _self if the current frame has no parent.
*
_top
*
The user agent should load the document into the full, * original window (thus canceling all other frames). This value is * equivalent to _self if the current frame has no parent.
*
* *

Returned Value

*

* This returns a Link object, a public inner class with methods: *

    *
  • getText() returns the link text.
  • *
  • getReference() returns the link reference value.
  • *
  • attributeCount() returns the number of declared attributes.
  • *
  • getAttributes() returns an iterator over any validated * XHTML-compliant attributes, returned as JDOM Attributes. *
  • *
*

* The attributeCount() method can be used to circumvent calling * getAttributes(), which will create an empty Iterator rather * than return a null. *

* *

Example: Link Form 1

*

* From an incoming wikitext link of: *

 *     [Acme] 
* returns: *
 *    getText():         "Acme"
 *    getReference():    "Acme"
 *    attributeCount():  0
 *    getAttributes():   an empty Iterator 
* *

Example: Link Form 2

*

* From an incoming wikitext link of: *

 *     [Acme | http://www.acme.com/] 
* returns: *
 *    getText():         "Acme"
 *    getReference():    "http://www.acme.com/"
 *    attributeCount():  0
 *    getAttributes():   an empty Iterator 
* *

Example: Link Form 3

*

* From an incoming wikitext link of: *

*
 *    [Acme | http://www.acme.com/ | id='foo' rel='Next'] 
* returns: *
 *    getText():         "Acme"
 *    getReference():    "http://www.acme.com/"
 *    attributeCount():  2
 *    getAttributes():   an Iterator containing:
 *      JDOM Attribute:  id="foo"
 *      JDOM Attribute:  rel="Next" 
* * * @since 2.5.10 */ public class LinkParser { private static final Logger LOG = LogManager.getLogger(LinkParser.class); /** Permitted attributes on links. Keep this sorted. */ private static final String[] PERMITTED_ATTRIBUTES = new String[] { "accesskey", "charset", "class", "dir", "hreflang", "id", "lang", "rel", "rev", "style", "tabindex", "target", "title", "type" }; /** Permitted values on the 'target' attribute. */ private static final String[] PERMITTED_TARGET_VALUES = new String[] { "_blank", "_self", "_parent", "_top" }; /** Links with target="_blank" can expose your site to performance and security issues. To fix, add rel="noopener" or rel="noreferrer" to these links. */ private static final String REL = "rel"; private static final String NOREFERRER = "noreferrer"; private static final String EQSQUO = "='"; private static final String SQUO = "'"; private static final String EQ = "="; private static final String TARGET = "target"; private static final String DELIMS = " \t\n\r\f="; private static final List< Attribute > m_EMPTY = new ArrayList< >(); // ............ /** * Processes incoming link text, separating out the link text, the link * URI, and then any specified attributes. * * @param linktext the wiki link text to be parsed * @return a Link object containing the link text, reference, and any valid Attributes * @throws ParseException if the parameter is null */ public Link parse(final String linktext ) throws ParseException { if( linktext == null ) { throw new ParseException("null value passed to link parser"); } Link link = null; try { // establish link text and link ref final int cut1 = linktext.indexOf('|'); if( cut1 == -1 ) { // link form 1: [Acme] return new Link( linktext ); } final int cut2 = cut1+1 < linktext.length() ? linktext.indexOf('|', cut1+1 ) : -1 ; if ( cut2 == -1 ) { // link form 2: [Acme | http://www.acme.com/] // text = Acme final String text = linktext.substring( 0, cut1 ).trim(); // ref = http://www.acme.com/ final String ref = linktext.substring( cut1+1 ).trim(); return new Link( text, ref ); } // link form 3: [Acme | http://www.acme.com/ | id='foo' rel='Next'] final String text = linktext.substring( 0, cut1 ).trim(); final String ref = linktext.substring( cut1+1, cut2 ).trim(); // attribs = id='foo' rel='Next' final String attribs = linktext.substring( cut2+1 ).trim(); link = new Link( text, ref ); // parse attributes // contains "='" that looks like attrib spec if(attribs.contains(EQSQUO)) { try { final StringTokenizer tok = new StringTokenizer(attribs,DELIMS,true); while ( tok.hasMoreTokens() ) { // get attribute name token String token = tok.nextToken(DELIMS).trim(); while ( isSpace(token) && tok.hasMoreTokens() ) { // remove all whitespace token = tok.nextToken(DELIMS).trim(); } // eat '=', break after '=' require( tok, EQ ); // eat opening delim require( tok, SQUO ); // using existing delim final String value = tok.nextToken(SQUO); // eat closing delim require( tok, SQUO ); if( token != null && value != null ) { if( Arrays.binarySearch( PERMITTED_ATTRIBUTES, token ) >= 0 ) { // _blank _self _parent _top if( !token.equals(TARGET) || Arrays.binarySearch( PERMITTED_TARGET_VALUES, value ) >= 0 ) { final Attribute a = new Attribute(token,value); link.addAttribute(a); if( token.equals(TARGET) ) { final Attribute rel = new Attribute(REL,NOREFERRER); link.addAttribute(rel); } } else { throw new ParseException("unknown target attribute value='" + value + "' on link"); } } else { throw new ParseException("unknown attribute name '" + token + "' on link"); } } else { throw new ParseException("unable to parse link attributes '" + attribs + "'"); } } } catch( final ParseException pe ) { LOG.warn("syntax error parsing link attributes '"+attribs+"': " + pe.getMessage()); } catch( final NoSuchElementException nse ) { LOG.warn("expected more tokens while parsing link attributes '" + attribs + "'"); } } } catch( final Exception e ) { LOG.warn( e.getClass().getName() + " thrown by link parser: " + e.getMessage() ); } return link; } private String require(final StringTokenizer tok, final String required ) throws ParseException, NoSuchElementException { final String s = tok.nextToken(required); if( !s.equals(required) ) { throw new ParseException("expected '"+required+"' not '"+s+"'"); } return s; } /** * Returns true if the String s is completely * composed of whitespace. * * @param s The string to check * @return True, if "s" is all XML whitespace. */ public static final boolean isSpace(final String s ) { return IntStream.range(0, s.length()).allMatch(i -> isSpace(s.charAt(i))); } /** * Returns true if char c is a member of * S (space) [XML 1.1 production 3]. * * @param c Character to check. * @return True, if the character is an XML space. */ public static final boolean isSpace(final char c ) { // 0x20 = SPACE, 0x0A = LF, 0x0D = CR, 0x09 = TAB, 0x85 = NEL, 0x2028 = Line separator return 0x20 == c || 0x0A == c || 0x0D == c || 0x09 == c || 0x85 == c || 0x2028 == c; } // ......................................................................... /** * Inner class serving as a struct containing the parsed * components of a link. */ public static class Link { private String m_text; private String m_ref; private int m_interwikiPoint = -1; private List m_attribs; /** * Create a new Link with text but no reference. * @param text The link text. * @throws ParseException If the link text is illegal. */ protected Link(final String text ) throws ParseException { setText(text); } /** * Create a new link with a given text and hyperlink (reference). * * @param text The link text. * @param ref The hypertext reference. * @throws ParseException If the link text or reference are illegal. */ protected Link(final String text, final String ref ) throws ParseException { setText(text); setReference(ref); } /** * Sets the link text. * * @param text The link text. * @throws ParseException If the text is illegal (e.g. null). */ protected void setText(final String text ) throws ParseException { if( text == null ) { throw new ParseException("null link text"); } m_text = text; } /** * Returns the link text. * * @return Link text. */ public String getText() { return m_text; } /** * Sets the hypertext reference. Typically, this is an URI or an interwiki link, * or a wikilink. * * @param ref The reference. * @throws ParseException If the reference is illegal. */ protected void setReference(final String ref ) throws ParseException { if( ref == null ) { throw new ParseException("null link reference value"); } m_ref = ref; } /** * Returns true, if there is a reference. * * @return True, if there's a reference; false otherwise. */ public boolean hasReference() { return m_ref != null; } /** * Returns the link reference, or the link text if null. * * @return A link reference. */ public String getReference() { return m_ref != null ? m_ref : m_text ; } /** * Returns true, if this Link represents an InterWiki link (of the form wiki:page). * * @return True, if this Link represents an InterWiki link. */ public boolean isInterwikiLink() { final LinkParsingOperations lpo = new LinkParsingOperations( null ); if( !hasReference() ) m_ref = m_text; m_interwikiPoint = lpo.interWikiLinkAt( m_ref ); return lpo.isInterWikiLink( m_ref ); } /** * Returns the name of the wiki if this is an interwiki link. *
         *    Link link = new Link("Foo","Wikipedia:Foobar");
         *    assert( link.getExternalWikiPage(), "Wikipedia" );
         *  
* * @return Name of the wiki, or null, if this is not an interwiki link. */ public String getExternalWiki() { if( isInterwikiLink() ) { return m_ref.substring( 0, m_interwikiPoint ); } return null; } /** * Returns the wikiname part of an interwiki link. Used only with interwiki links. *
         *    Link link = new Link("Foo","Wikipedia:Foobar");
         *    assert( link.getExternalWikiPage(), "Foobar" );
         *  
* * @return Wikiname part, or null, if this is not an interwiki link. */ public String getExternalWikiPage() { if( isInterwikiLink() ) { return m_ref.substring( m_interwikiPoint+1 ); } return null; } /** * Returns the number of attributes on this link. * * @return The number of attributes. */ public int attributeCount() { return m_attribs != null ? m_attribs.size() : 0 ; } /** * Adds another attribute to the link. * * @param attr A JDOM Attribute. */ public void addAttribute(final Attribute attr ) { if( m_attribs == null ) { m_attribs = new ArrayList<>(); } m_attribs.add(attr); } /** * Returns an Iterator over the list of JDOM Attributes. * * @return Iterator over the attributes. */ public Iterator< Attribute > getAttributes() { return m_attribs != null ? m_attribs.iterator() : m_EMPTY.iterator() ; } /** * Returns a wikitext string representation of this Link. * @return WikiText. */ @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append( '[' ); sb.append( m_text ); if( m_ref != null ) { sb.append( ' ' ); sb.append( '|' ); sb.append( ' ' ); sb.append( m_ref ); } if( m_attribs != null ) { sb.append( ' ' ); sb.append( '|' ); final Iterator< Attribute > it = getAttributes(); while ( it.hasNext() ) { final Attribute a = it.next(); sb.append( ' ' ); sb.append( a.getName() ); sb.append( '=' ); sb.append( '\'' ); sb.append( a.getValue() ); sb.append( '\'' ); } } sb.append( ']' ); return sb.toString(); } } // end inner class }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy