Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.github.gitbucket.markedj.Lexer Maven / Gradle / Ivy
package io.github.gitbucket.markedj;
import io.github.gitbucket.markedj.rule.Rule;
import io.github.gitbucket.markedj.token.*;
import java.util.*;
import static io.github.gitbucket.markedj.Utils.*;
public class Lexer {
protected Options options;
protected Map rules = null;
public Lexer(Options options){
this.options = options;
if(!options.isGfm()){
this.rules = Grammer.BLOCK_RULES;
} else if(options.isTables()){
this.rules = Grammer.BLOCK_TABLE_RULES;
} else {
this.rules = Grammer.BLOCK_GFM_RULES;
}
}
public LexerResult lex(String src){
LexerContext context = new LexerContext();
token(src
.replace("\r\n", "\n")
.replace("\r", "\n")
.replace("\t", " ")
.replace("\u00a0", " ")
.replace("\u2424", "\n"),
true, false, context);
return new LexerResult(context.getTokens(), context.getLinks(), context.getFootnotes());
}
protected void token(String src, boolean top, boolean bq, LexerContext context){
while(src.length() > 0){
// newline
{
List cap = rules.get("newline").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
if(cap.get(0).length() > 1){
context.pushToken(new SpaceToken());
}
}
}
// code
{
List cap = rules.get("code").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
String code = cap.get(0).replaceAll("(?m)^ {4}", "");
if(!options.isPedantic()){
context.pushToken(new CodeToken(code.replaceAll("\\n+$", ""), null, false));
} else {
context.pushToken(new CodeToken(code, null, false));
}
continue;
}
}
// fences (gfm)
{
List cap = rules.get("fences").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
context.pushToken(new CodeToken(cap.get(3), cap.get(2), false));
continue;
}
}
// heading
{
List cap = rules.get("heading").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
context.pushToken(new HeadingToken(cap.get(1).length(), cap.get(2)));
continue;
}
}
// footnote
if (top) {
List cap = rules.get("footnote").exec(src);
if (!cap.isEmpty()) {
src = src.substring(cap.get(0).length());
String key = cap.get(1).toLowerCase();
context.pushFootnotes(key, cap.get(2));
continue;
}
}
// table no leading pipe (gfm)
if(top){
List cap = rules.get("nptable").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
String[] headers = cap.get(1).replaceAll("^ *| *\\| *$", "").split(" *\\| *");
String[] aligns = cap.get(2).replaceAll("^ *|\\| *$", "").split(" *\\| *");
String[] rows = cap.get(3).replaceAll("\n$", "").split("\n");
List headerList = array2list(headers);
List alignList = new ArrayList<>();
for (String s : aligns) {
if(s.matches("^ *-+: *$")){
alignList.add("right");
} else if(s.matches("^ *:-+: *$")){
alignList.add("center");
} else if(s.matches("^ *:-+ *$")){
alignList.add("left");
} else {
alignList.add(null);
}
}
int maxColumns = Math.max(headers.length, aligns.length);
List> rowList = new ArrayList<>();
for (String row : rows) {
String[] columns = row.split(" *\\| *");
if(maxColumns < columns.length){
maxColumns = columns.length;
}
rowList.add(array2list(columns));
}
fillList(headerList, maxColumns, "");
fillList(alignList, maxColumns, null);
for(List row: rowList){
fillList(row, maxColumns, "");
}
context.pushToken(new TableToken(headerList, alignList, rowList));
continue;
}
}
// lheading
{
List cap = rules.get("lheading").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
if(cap.get(2).equals("=")){
context.pushToken(new HeadingToken(1, cap.get(1)));
} else {
context.pushToken(new HeadingToken(2, cap.get(1)));
}
continue;
}
}
// hr
{
List cap = rules.get("hr").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
context.pushToken(new HrToken());
continue;
}
}
// blockquote
{
List cap = rules.get("blockquote").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
context.pushToken(new BlockquoteStartToken());
token(cap.get(0).replaceAll("(?m) *> ?", ""), top, true, context);
context.pushToken(new BlockquoteEndToken());
continue;
}
}
// list
{
List cap = rules.get("list").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
String bull = cap.get(2);
context.pushToken(new ListStartToken(bull.matches("^[0-9]+\\.$")));
boolean next = false;
// Get each top-level item.
cap = rules.get("item").exec(cap.get(0));
if(!cap.isEmpty()){
for(int i = 0; i < cap.size(); i++){
String item = cap.get(i);
// Remove the list item's bullet
// so it is seen as the nextToken token.
int space = item.length();
item = item.replaceAll("^ *([*+-]|\\d+\\.) +", "");
// Outdent whatever the
// list item contains. Hacky.
if(item.indexOf("\n ") > 0){
space = space - item.length();
if(!options.isPedantic()){
item = item.replaceAll("(?m)^ {1," + space + "}", "");
} else {
item = item.replaceAll("(?m)^ {1,4}", "");
}
}
// // Determine whether the nextToken list item belongs here.
// // Backpedal if it does not belong in this list.
// if(options.isSmartLists() && i != cap.size() - 1){
// Pattern p = Pattern.compile(Grammer.BULLET);
// if(p.matcher(cap.get(i + 1)).find()){
// src = String.join("\n", cap.subList(i + 1, cap.size())) + src;
// i = i - 1;
// }
// }
// Determine whether item is loose or not.
// Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
// for discount behavior.
boolean loose = next || item.matches("\\n\\n(?!\\s*$)");
if(i != cap.size() - 1){
next = !item.isEmpty() && item.charAt(item.length() - 1) == '\n';
if(!loose) {
loose = next;
}
}
if(loose){
context.pushToken(new LooseItemStartToken());
} else {
context.pushToken(new ListItemStartToken());
}
token(item, false, bq, context);
context.pushToken(new ListItemEndToken());
}
}
context.pushToken(new ListEndToken());
continue;
}
}
// html
{
List cap = rules.get("html").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
if(options.isSanitize()){
context.pushToken(new ParagraphToken(cap.get(0)));
} else {
context.pushToken(new HtmlToken(cap.get(0),
!options.isSanitize() && (cap.get(0).equals("pre") || cap.get(0).equals("script") || cap.get(0).equals("style"))));
}
continue;
}
}
// def
if(!bq && top){
List cap = rules.get("def").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
context.defineLink(cap.get(1).toLowerCase(), new Link(cap.get(2), cap.get(3)));
continue;
}
}
// table (gfm)
if(top){
List cap = rules.get("table").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
String[] headers = cap.get(1).replaceAll("^ *| *\\| *$", "").split(" *\\| *");
String[] aligns = cap.get(2).replaceAll("^ *|\\| *$", "").split(" *\\| *");
String[] rows = cap.get(3).replaceAll("(?: *\\| *)?\\n$", "").split("\\n");
List headerList = array2list(headers);
List alignList = new ArrayList<>();
for (String s : aligns) {
if(s.matches("^ *-+: *$")){
alignList.add("right");
} else if(s.matches("^ *:-+: *$")){
alignList.add("center");
} else if(s.matches("^ *:-+ *$")){
alignList.add("left");
} else {
alignList.add(null);
}
}
int maxColumns = Math.max(headers.length, aligns.length);
List> rowList = new ArrayList<>();
for (String row : rows) {
String[] columns = row.replaceAll("^ *\\| *| *\\| *$", "").split(" *\\| *");
if(maxColumns < columns.length){
maxColumns = columns.length;
}
rowList.add(array2list(columns));
}
fillList(headerList, maxColumns, "");
fillList(alignList, maxColumns, null);
for(List row: rowList){
fillList(row, maxColumns, "");
}
context.pushToken(new TableToken(headerList, alignList, rowList));
continue;
}
}
// top-level paragraph
if(top){
List cap = rules.get("paragraph").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
if(cap.get(1).charAt(cap.get(1).length() - 1) == '\n'){
context.pushToken(new ParagraphToken(cap.get(1).substring(0, cap.get(1).length() - 1)));
} else {
context.pushToken(new ParagraphToken(cap.get(1)));
}
continue;
}
}
// text
{
List cap = rules.get("text").exec(src);
if(!cap.isEmpty()){
src = src.substring(cap.get(0).length());
context.pushToken(new TextToken((cap.get(0))));
continue;
}
}
// TODO Error
//println("Infinite loop on byte: " + source.charAt(0).toByte)
}
}
public static class LexerContext {
private Stack tokens = new Stack<>();
private Map links = new HashMap<>();
private Map footnotes = new HashMap<>();
public void pushToken(Token token){
this.tokens.push(token);
}
public void defineLink(String key, Link link){
this.links.put(key, link);
}
public void pushFootnotes(String key, String text) {
this.footnotes.put(key, text);
}
public Stack getTokens() {
return tokens;
}
public Map getLinks() {
return links;
}
public Map getFootnotes() {
return footnotes;
}
}
public static class LexerResult {
private Stack tokens;
private Map links = new HashMap<>();
private Map footnotes = new HashMap<>();
public LexerResult(Stack tokens, Map links, Map footnotes){
this.tokens = tokens;
this.links = links;
this.footnotes = footnotes;
}
public Stack getTokens() {
return tokens;
}
public Map getLinks() {
return links;
}
public Map getFootnotes() {
return footnotes;
}
}
public static class Link {
private String href;
private String title;
public Link(String href, String title){
this.href = href;
this.title = title;
}
public String getHref() {
return href;
}
public String getTitle() {
return title;
}
}
}