org.sweble.wikitext.lazy.parser.InternalLink.rats Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of swc-parser-lazy Show documentation
Show all versions of swc-parser-lazy Show documentation
A parser for MediaWiki's Wikitext.
/**
* Copyright 2011 The Open Source Research Group,
* University of Erlangen-Nürnberg
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*!
*
* Internal Link or Image
* ----------------------
*
* Grammar:
* - '[[' PageTitle LinkOption* LinkTitle ']]' LinkPostfix
*
* - PageTitle can contain spaces but no newlines.
* - If a newline occurs before the closing brackets or the first pipe
* character, the internal link is not recognized as such!
*
* Internal Link
* -------------
*
* AST node:
* Name : InternalLink
* Extends : InnerNode.InnerNode1
* Implements : Cloneable
* Constructor : "prefix, target, title, postfix"
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_INTERNAL_LINK
*
* Children:
* title : LinkTitle
*
* Properties:
* target : String
* prefix : String
* postfix : String
*
* Body:
* {
* @Override
* protected Object clone()
* {
* try
* {
* return super.clone();
* }
* catch (CloneNotSupportedException e)
* {
* assert false;
* return null;
* }
* }
* }
*
* Image Link
* ----------
*
* AST node:
* Name : ImageLink
* Extends : InnerNode.InnerNode2
* Constructor : "target, options, title, format, border, hAlign, vAlign, width, height, upright, linkPage, linkUrl, alt"
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_IMAGE_LINK
*
* Children:
* options : NodeList
* title : LinkTitle
*
* Properties:
* target : String
* width : int
* height : int
* upright : boolean
* hAlign : ImageHorizAlign
* vAlign : ImageVertAlign
* format : ImageViewFormat
* border : boolean
* linkPage : String
* linkUrl : Url
* alt : LinkOptionAltText
*
* Link Option: Resize
* -------------------
*
* Grammar:
* - LinkOptionResize ::= Ws* Digit+ Space* 'px' Ws*
* - LinkOptionResize ::= Ws* Digit+ 'x' Digit* Space* 'px' Ws*
* - LinkOptionResize ::= Ws* 'x' Digit+ Space* 'px' Ws*
*
* - If heightString is null the first production matched.
* - If widthString is null the third production matched.
* - If both are null there is no information which production matched.
* - Otherwise the second production matched.
* - If one of the dimensions is missing the value of the width/height is set
* to -1.
*
* AST node:
* Name : LinkOptionResize
* Extends : LeafNode
* Constructor : "width, height"
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_LINK_OPTION_RESIZE
*
* Properties:
* width : int
* height : int
*
* Link Option: Keyword
* --------------------
*
* Grammar:
* - LinkOptionKeyword ::= Ws* LinkOptionKeyword Ws*
*
* AST node:
* Name : LinkOptionKeyword
* Extends : LeafNode
* Constructor : "keyword"
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_LINK_OPTION_KEYWORD
*
* Properties:
* keyword : String
*
* Link Option: LinkTarget
* -----------------------
*
* Grammar:
* - LinkOptionLinkTarget ::= Ws* 'link=' LinkTarget Ws*
* - LinkOptionLinkTarget ::= Ws* 'link=' Url Ws*
*
* AST node:
* Name : LinkOptionLinkTarget
* Extends : InnerNode.InnerNode1
* Constructor : target
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_LINK_OPTION_LINK_TARGET
*
* Children:
* target : AstNode
*
* Link Option: AltText
* --------------------
*
* Grammar:
* - LinkOptionAltText ::= Ws* 'alt=' LinkTitleContent{ALT}*
*
* AST node:
* Name : LinkOptionAltText
* Extends : ContentNode
* Constructor : content
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_LINK_OPTION_ALT_TEXT
*
* Link Option: Garbage
* --------------------
*
* Grammar:
* - LinkOptionGarbage ::= LinkTitleContent{ALT}* &'|'
*
* AST node:
* Name : LinkOptionGarbage
* Extends : StringContentNode
* Constructor : content
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_LINK_OPTION_GARBAGE
*
* Link Title
* ----------
*
* Grammar:
* - LinkTitle ::= LinkTitleContent{TITLE}*
*
* LinkTitleContent can contain:
* - Newline
* - Tables
* - Headings
* - Horizontal rules
* - Block level elements
* - Signature
* - InternalLink
* - ExternalLink
* - PlainExternalLink
* - Ticks
* - XmlReference
* - XmlElement
* - ParserEntity
*
* The LinkTitleContent cannot contain:
* - MagicWord
*
* AST node:
* Name : LinkTitle
* Extends : ContentNode
* Constructor : content
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_LINK_TITLE
*
*/
module org.sweble.wikitext.lazy.parser.InternalLink;
import org.sweble.wikitext.lazy.utils.Numbers;
import org.sweble.wikitext.lazy.utils.ParserCharSequence;
import org.sweble.wikitext.lazy.utils.Warnings;
import org.sweble.wikitext.lazy.utils.XmlReference;
import org.sweble.wikitext.lazy.parser.Content;
import org.sweble.wikitext.lazy.parser.LinkTarget;
import org.sweble.wikitext.lazy.parser.State;
import org.sweble.wikitext.lazy.parser.Url;
import org.sweble.wikitext.lazy.parser.Whitespace;
// -- Header / Body / Footer ---------------------------------------------------
body
{
Result parseLinkPostfix(int start, int base)
{
Pattern p = yyState.getInternalLinkPostfixPattern();
if (p == null)
return new SemanticValue("", base);
Matcher m = p.matcher(new ParserCharSequence(base));
if (!m.lookingAt())
{
return new SemanticValue("", base);
}
else
{
return new SemanticValue(m.group(), base + m.end());
}
}
private AstNode addLinkPrefix(Text text, InternalLink link)
{
String t = text.getContent();
if (!t.isEmpty())
{
// shortcut (prefix cannot contain whitespace)
if (!Character.isWhitespace(t.charAt(t.length() - 1)))
{
Matcher matcher =
getState().getInternalLinkPrefixPattern().matcher(t);
if (matcher.find())
{
String p = matcher.group(1);
if (!p.isEmpty())
{
String b = t.substring(0, t.length() - p.length());
InternalLink l = (InternalLink) link.clone();
l.setPrefix(p);
if (isGatherRtData())
prependRtData(l, p);
return new NodeList(new Text(b), l);
}
}
}
}
return new NodeList(text, link);
}
private int toInt(String num)
{
if (num == null)
return -1;
try
{
return Integer.parseInt(num);
}
catch (NumberFormatException e)
{
return -2;
}
}
private LinkBuilder getLinkBuilder()
{
return getState().getLinkBuilder();
}
private boolean isImageTarget()
{
return getLinkBuilder().isImageTarget();
}
private boolean isValidTarget()
{
return getLinkBuilder().isValidTarget();
}
}
// -- Internal link --[ State Aware Memoization ]-------------------------------
noinline transient AstNode InternalLink =
^{
StateAwareResult r = (StateAwareResult) pInternalLinkMemoized(yyBase);
final LazyParserContext context = getContext();
Result yyResult = r.getResult(context);
if (yyResult == null)
yyResult = r.setResult(context, pInternalLinkTransient(yyBase));
if (returnTrue(r))
return yyResult;
}
;
noinline memoized AstNode InternalLinkMemoized =
^{
Result yyResult = new StateAwareResult("InternalLink", getContext(), pInternalLinkTransient(yyBase));
if (returnTrue(yyResult))
return yyResult;
}
;
// -- Internal link ------------------------------------------------------------
noinline transient AstNode InternalLinkTransient =
"[[" &{ accept(ParserAtoms.INTERNAL_LINK) } yyValue:InternalLinkChoice
/ "["
{
yyValue = new Text("[");
}
;
inline void InlineContentStopperInternalLink =
&{ inScope(ParserScopes.INTERNAL_LINK_ALT) } ( "]]" / "[[" InternalLinkChoice / '|' )
/ &{ inScope(ParserScopes.INTERNAL_LINK_TITLE) } ( "]]" / "[[" InternalLinkChoice )
/ &{ inScope(ParserScopes.IMAGE_LINK_ALT) } ( "]]" / '|' )
/ &{ inScope(ParserScopes.IMAGE_LINK_TITLE) } ( "]]" )
;
private transient stateful AstNode InternalLinkChoice =
rt0:pTpStar target:LinkTargetString rt1:pExtSpaceStar
{
getState().initLinkBuilder(target);
}
&{ isValidTarget() } options:OptionsAndTitle "]]" postfix:LinkPostfix?
{
if (postfix == null)
postfix = "";
yyValue = getState().getLinkBuilder().build(options, postfix);
if (isGatherRtData())
{
if (isImageTarget())
{
addRtData(yyValue,
joinRt("[[", rt0, target, rt1),
null,
joinRt("]]", postfix));
}
else
{
addRtData(yyValue,
joinRt("[[", rt0, target, rt1),
joinRt("]]", postfix));
}
}
}
/ &{ isWarningsEnabled() } InternalLinkAutoCorrect
;
private inline AstNode InternalLinkAutoCorrect =
&{ isAutoCorrect(WS_INFO) }
rt0:pTpStar target:LinkTargetString
{
getState().initLinkBuilder(target);
}
&{ isValidTarget() }
{
yyValue = getState().getLinkBuilder().build(new NodeList(), null);
if (isGatherRtData())
{
if (isImageTarget())
{
addRtData(yyValue, joinRt("[[", rt0, target), null, null);
}
else
{
addRtData(yyValue, joinRt("[[", rt0, target), null);
}
}
fileLooksLikeWarning(
yyValue,
makeSpan(yyStart - 2, yyResult),
WS_INFO,
"Internal Link",
"the finishing `]]' is missing");
}
/ &{ isWarningLevelEnabled(WS_INFO) }
&( pTpStar target:LinkTargetString
{
getState().initLinkBuilder(target);
}
&{ isValidTarget() }
)
{
yyValue = new Text("[[");
fileLooksLikeWarning(
yyValue,
makeSpan(yyStart - 2, yyPredResult),
WS_INFO,
"Internal Link",
"the finishing `]]' is missing");
}
/ &{ isWarningLevelEnabled(WS_NONE) }
{
yyValue = new Text("[[");
fileLooksLikeWarning(
yyValue,
makeSpan(yyStart - 2, yyStart),
WS_NONE,
"Internal Link",
"a target link is missing");
}
;
// -- Options ------------------------------------------------------------------
private inline NodeList OptionsAndTitle =
&{ !isImageTarget() } LinkTitle?
{
yyValue = null;
}
/ yyValue:LinkOptionStar LinkTitle?
;
private transient NodeList LinkOptionStar =
options:( void:'|' LinkOptionChoice )*
{
yyValue = new NodeList(options);
}
;
private inline AstNode LinkOptionChoice =
LinkOptionResize &( '|' / "]]" )
/ LinkOptionKeyword &( '|' / "]]" )
/ LinkOptionNameValue
/ LinkOptionGarbage &'|'
;
// -- Link Option Resize -------------------------------------------------------
private LinkOptionResize LinkOptionResize =
rt0:pExtWsStar width:DigitPlus rt1:pSpaceStar "px" rt2:pExtWsStar
{
int w = toInt(width);
if (w == -2)
getLinkBuilder().addWarning(
makeOddSyntaxWarning(
makeSpan(yyStart, yyResult),
WS_INFO,
"Invalid size format"));
getLinkBuilder().setWidth(w);
yyValue = null;
if (isGatherRtData())
{
yyValue = new LinkOptionResize(w, -1);
addRtData(yyValue, joinRt('|', rt0, width, rt1, "px", rt2));
}
}
/ rt0:pExtWsStar width:DigitPlus 'x' height:DigitPlus? rt1:pSpaceStar "px" rt2:pExtWsStar
{
int w = toInt(width);
int h = toInt(height);
if (w == -2 || h == -2)
getLinkBuilder().addWarning(
makeOddSyntaxWarning(
makeSpan(yyStart, yyResult),
WS_INFO,
"Invalid size format"));
getLinkBuilder().setWidth(w);
getLinkBuilder().setHeight(h);
yyValue = null;
if (isGatherRtData())
{
yyValue = new LinkOptionResize(w, h);
addRtData(yyValue, joinRt('|', rt0, width, 'x', height, rt1, "px", rt2));
}
}
/ rt0:pExtWsStar 'x' height:DigitPlus rt1:pSpaceStar "px" rt2:pExtWsStar
{
int h = toInt(height);
if (h == -2)
getLinkBuilder().addWarning(
makeOddSyntaxWarning(
makeSpan(yyStart, yyResult),
WS_INFO,
"Invalid size format"));
getLinkBuilder().setHeight(h);
yyValue = null;
if (isGatherRtData())
{
yyValue = new LinkOptionResize(-1, h);
addRtData(yyValue,joinRt('|', rt0, 'x', height, rt1, "px", rt2));
}
}
;
// -- Link Option Keyword ------------------------------------------------------
private LinkOptionKeyword LinkOptionKeyword =
rt0:pExtWsStar keyword:LinkOptionKeywordString &{ getLinkBuilder().addKeyword(keyword) } rt1:pExtWsStar
{
yyValue = null;
if (isGatherRtData())
{
yyValue = new LinkOptionKeyword(keyword);
addRtData(yyValue, joinRt('|', rt0, keyword, rt1));
}
}
;
private transient String LinkOptionKeywordString =
[A-Za-z]+
;
// -- Link Option Name Value ---------------------------------------------------
private transient stateful AstNode LinkOptionNameValue =
rt0:pExtWsStar "link=" target:LinkTargetOverride rt1:pExtWsStar
{
getLinkBuilder().setLink(target);
yyValue = null;
if (isGatherRtData())
{
yyValue = new LinkOptionLinkTarget(target);
addRtData(yyValue,
joinRt('|', rt0, "link="),
joinRt(rt1));
}
}
/ rt0:pExtWsStar "alt="
{
enter(ParserScopes.INTERNAL_LINK_ALT);
}
alt:LinkTitleContentStar
{
yyValue = new LinkOptionAltText(alt);
getLinkBuilder().setAlt((LinkOptionAltText) yyValue);
if (isGatherRtData())
{
addRtData(yyValue,
joinRt('|', rt0, "alt="),
null);
}
}
// TODO: Support page=...
;
private inline AstNode LinkTargetOverride =
Url
/ LinkTarget
;
// -- Link Option Garbage ------------------------------------------------------
private inline stateful LinkOptionGarbage LinkOptionGarbage =
{
enter(isImageTarget() ?
ParserScopes.IMAGE_LINK_ALT :
ParserScopes.INTERNAL_LINK_ALT);
}
LinkTitleContentStar
{
yyValue = null;
if (isGatherRtData())
{
yyValue = new LinkOptionGarbage(difference(yyStart, yyResult));
addRtData(yyValue, joinRt('|', yyValue.getContent()));
}
}
;
// -- Link Title ---------------------------------------------------------------
private inline stateful LinkTitle LinkTitle =
{
enter(isImageTarget() ?
ParserScopes.IMAGE_LINK_TITLE :
ParserScopes.INTERNAL_LINK_TITLE);
}
'|' title:LinkTitleContentStar
{
yyValue = new LinkTitle(title);
getLinkBuilder().setTitle(yyValue);
if (isGatherRtData())
addRtData(yyValue, joinRt('|'));
}
;
// -- Link Title Content -------------------------------------------------------
private inline NodeList LinkTitleContentStar =
InlineBlockContent
;
// -- Link Postfix -------------------------------------------------------------
private noinline transient String LinkPostfix =
&{ !isImageTarget() }
^{
Result yyResult = parseLinkPostfix(yyStart, yyBase);
}
;
// -- End of file --------------------------------------------------------------