org.sweble.wikitext.lazy.parser.Content.rats Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of swc-parser-lazy Show documentation
Show all versions of swc-parser-lazy Show documentation
A parser for MediaWiki's Wikitext.
/**
* Copyright 2011 The Open Source Research Group,
* University of Erlangen-Nürnberg
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*!
*
* Content
* -------
*
* Overview:
* - First see if it's something special (a link, a magic word, ...)
* - If not, it's just text
*
* Closer look:
* - Special things always have their unique prefix.
* - If such a prefix is encountered in the text, the rule for the
* corresponding thing MUST match. Either because we really found
* something special, or because the rule just consumed the prefix as
* plain text to let the parser continue with it's work.
*
* Even closer look:
* - This "must match" requirement helps us escape the content if a prefix
* of a closing element is encountered in the text and this prefix is also
* the prefix of a one of these special things.
*
* Paragraph
* ---------
*
* Grammar:
* - ( '\n' Space* &'\n' )+
*
* AST node:
* Name : Paragraph
* Extends : ContentNode
* Constructor : content
* NodeType : org.sweble.wikitext.lazy.AstNodeTypes.NT_PARAGRAPH
*
*/
module org.sweble.wikitext.lazy.parser.Content;
import org.sweble.wikitext.lazy.utils.ContentUtils;
import org.sweble.wikitext.lazy.utils.Roundtrip;
import org.sweble.wikitext.lazy.utils.XmlReference;
import org.sweble.wikitext.lazy.parser.ExternalLink;
import org.sweble.wikitext.lazy.parser.InternalLink;
import org.sweble.wikitext.lazy.parser.MagicWord;
import org.sweble.wikitext.lazy.parser.ParserEntity;
import org.sweble.wikitext.lazy.parser.Signature;
import org.sweble.wikitext.lazy.parser.Ticks;
import org.sweble.wikitext.lazy.parser.Url;
import org.sweble.wikitext.lazy.parser.HorizontalRule;
import org.sweble.wikitext.lazy.parser.List;
import org.sweble.wikitext.lazy.parser.SemiPre;
import org.sweble.wikitext.lazy.parser.Section;
import org.sweble.wikitext.lazy.parser.Table;
import org.sweble.wikitext.lazy.parser.TableHeader;
import org.sweble.wikitext.lazy.parser.TableCell;
import org.sweble.wikitext.lazy.parser.XmlElement;
import org.sweble.wikitext.lazy.parser.Whitespace;
// -- Block content ------------------------------------------------------------
/* BolBlockContent = ( LineStartProd / Paragraph )*
*/
noinline transient NodeList BolBlockContent =
content:Block*
{
yyValue = new NodeList(content);
}
;
/* BlockContent = Paragraph ( LineStartProd / Paragraph )*
*/
noinline transient NodeList BlockContent =
head:Paragraph tail:Block*
{
yyValue = new NodeList(head, tail);
}
;
private inline AstNode Block =
!BlockStopper ( LineStartProd / Paragraph )
;
private inline void BlockStopper =
BlockStopperNextSection
/ BlockStopperNextTableElement
;
private noinline transient AstNode LineStartProd =
&( pExtSpaceStar "{|" ) Table
/ &( pTpStar "---" ) HorizontalRule
/ &( pTpStar [*#:] ) List
/ &( pTpStar [ ] ) SemiPre
/ &( pTpStar [=] ) Sections
;
// -- Paragraph --[ State Aware Memoization ]-----------------------------------
noinline transient AstNode Paragraph =
^{
StateAwareResult r = (StateAwareResult) pParagraphMemoized(yyBase);
final LazyParserContext context = getContext();
Result yyResult = r.getResult(context);
if (yyResult == null)
yyResult = r.setResult(context, pParagraphTransient(yyBase));
if (returnTrue(r))
return yyResult;
}
;
noinline memoized AstNode ParagraphMemoized =
^{
Result yyResult = new StateAwareResult("Paragraph", getContext(), pParagraphTransient(yyBase));
if (returnTrue(yyResult))
return yyResult;
}
;
// -- Paragraph ----------------------------------------------------------------
/* Paragraph = PreParaWs ParagraphText PostParaWs?
* / PreParaWs
* / ParagraphText PostParaWs?
*/
noinline transient AstNode ParagraphTransient =
pre:PreParaWs !ParagraphStopper p:ParagraphText post:PostParaWs?
{
NodeList l = new NodeList();
l.add(pre);
l.add(IntermediateTags.PARAGRAPH.createOpen(false));
l.add(p);
l.add(post);
l.add(IntermediateTags.PARAGRAPH.createClose(false));
yyValue = l;
}
/ PreParaWs
/ p:ParagraphText post:PostParaWs?
{
NodeList l = new NodeList();
l.add(IntermediateTags.PARAGRAPH.createOpen(false));
l.add(p);
l.add(post);
l.add(IntermediateTags.PARAGRAPH.createClose(false));
yyValue = l;
}
;
private inline void ParagraphStopper =
BlockStopper
/ LineStartProd
;
/* PreParaWs = S* EOL ( S* EOL )*
*/
private noinline transient Whitespace PreParaWs =
s:pExtSpaceStar lt:pEol tail:PreParaWsMore*
{
yyValue = new Whitespace(
new NodeList(s, new Text(lt), new NodeList(tail)),
true);
}
;
private inline NodeList PreParaWsMore =
!ParagraphStopper s:pExtSpaceStar lt:pEol
{
yyValue = new NodeList(s, new Text(lt));
}
;
/* ParagraphText = Content+ ( EOL Content+ )*
*/
private noinline transient AstNode ParagraphText =
head:InlineContentPlus tail:ParagraphTextMore*
{
yyValue = new NodeList(head, tail);
}
;
private inline AstNode ParagraphTextMore =
lt:pEol !ParagraphStopper s:pExtSpaceStar c:InlineContentPlus
{
yyValue = new NodeList(
new Whitespace(
new NodeList(new Text(lt)),
true),
s,
c);
}
;
/* PostParaWs = EOL ( S* EOL )*
*/
private noinline transient Whitespace PostParaWs =
lt:pEol tail:PostParaWsMore*
{
yyValue = new Whitespace(
new NodeList(new Text(lt), new NodeList(tail)),
true);
}
;
private inline NodeList PostParaWsMore =
!ParagraphStopper s:pExtSpaceStar lt:pEol
{
yyValue = new NodeList(s, new Text(lt));
}
;
// -- For strange things like internal link titles -----------------------------
/* InlineBlockContent = InlineTextBlock ( LineStartProd / InlineTextBlock )
*/
noinline transient NodeList InlineBlockContent =
head:( !BlockStopper InlineTextBlock )? tail:InlineBlock*
{
yyValue = new NodeList(head, new NodeList(tail));
}
;
private inline AstNode InlineBlock =
!BlockStopper ( LineStartProd / InlineTextBlock )
;
/* InlineTextBlock = Content+ ( EOL Content* )*
* / ( EOL Content* )+
*/
private transient AstNode InlineTextBlock =
head:InlineContentPlus tail:InlineTextBlockMore*
{
yyValue = new NodeList(head, tail);
}
/ tail:InlineTextBlockMore+
{
yyValue = new NodeList(tail);
}
;
private inline AstNode InlineTextBlockMore =
lt:pEol more:( !ParagraphStopper InlineContentPlus )?
{
yyValue = new NodeList(new Text(lt), more);
}
;
// -- Inline Content -----------------------------------------------------------
transient NodeList InlineContentStar =
content:InlineContentChoice*
{
yyValue = new NodeList(content);
}
;
transient NodeList InlineContentPlus =
content:InlineContentChoice+
{
yyValue = new NodeList(content);
}
;
/* TextPlus will stop on ContentAtoms and ContentStoppers.
*
* ContentAtoms will never fail if there is a ContentAtom prefix. If it's not a
* real ContentAtom or the respective ContentAtom is not allowed in a given
* context the production responsible for the prefix will consume a part of the
* prefix and return a Text node containing only the consumed prefix. That's
* why ContentAtoms will never fail if there is a ContentAtom prefix.
*
* Eventually this production will only fail, if it encounters a
* ContentStopper, that is a syntactic element which is important to the
* surrounding production and must therefore not be consumed as part of the
* content.
*
* Plain URLs are quite the hack (for performance reasons; they don't have a
* "proper prefix") and have to be treated specially (see Url grammar for
* details). The same goes for internal links and their prefix.
*/
private transient AstNode InlineContentChoice =
text:TextPlus ':' &{ accept(ParserAtoms.PLAIN_EXTERNAL_LINK) } path:UrlPathString &{ isProtocol(text, path) }
{
yyValue = makeExternalLink(text, path);
}
/ text:TextPlus &"[[" !InlineContentStopper link:InternalLink
{
yyValue = link.isNodeType(AstNodeTypes.NT_INTERNAL_LINK) ?
addLinkPrefix(text, (InternalLink) link) :
new NodeList(text, link);
}
/ TextPlus
/ !InlineContentStopper InlineContentAtom
/ !InlineContentStopper text:TextStopperPrefix
{
yyValue = new Text(text);
}
;
/* The predicates before each ContentAtom are only there to simplify debugging.
* And sort of to remind us which production is responsible for which prefix.
*/
private inline AstNode InlineContentAtom =
&"~~" Signature
/ &"[[" InternalLink
/ &"[" ExternalLink
/ &"''" Ticks
/ &"__" MagicWord
/ &"<" XmlElement
/ &"&" XmlReference
/ &"\uE000" ParserEntity
;
private inline void InlineContentStopper =
slEol
/ InlineContentStopperExternalLink
/ InlineContentStopperInternalLink
/ InlineContentStopperHeading
/ InlineContentStopperTableHeader
/ InlineContentStopperTableCell
;
/* Plain text is basically everything that is not a ContentAtom (semantically,
* a Text node is also a ContentAtom, however, it is not part of the respective
* production). Also plain text has to stop at so-called TextStoppers. These
* are syntactic elements which are significant to the surrounding production
* and must not become part of the plain text (in order for the surrounding
* production to see and consume them).
*/
private Text TextPlus =
text:TextPlusStr
{
yyValue = new Text(text);
}
;
private transient String TextPlusStr = ( !TextStopperPrefix _ )+ ;
private inline String TextStopperPrefix =
// -- stop characters --
pSlEol // block level end (lists, semi pre)
// heading abort
// external link title abort
// table element start
// table cell/header inline element end
/ "=" // heading end
/ "|" // internal link parameter end
// table cell inline element end
/ "!" // table header inline element end
/ "]" // external and internal link title end
/ ":" // Definition list definition (in the pair TERM:DEF)
// -- content atom prefix --
/ ":" // plain url
/ "~~" // signature
/ "[" // internal or external link
/ "''" // bold or italic
/ "__" // magic word
/ "<" // xml tag and comment
/ "&" // xml entity
/ "\uE000" // parser entity
;
// -- End of file --------------------------------------------------------------