All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sweble.wikitext.lazy.encval.EncodingValidatorLexer.jflex Maven / Gradle / Ivy

There is a newer version: 3.1.9
Show newest version
/**
 * Copyright 2011 The Open Source Research Group,
 *                University of Erlangen-Nürnberg
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.sweble.wikitext.lazy.encval;

import de.fau.cs.osr.ptk.common.EntityMap;
import de.fau.cs.osr.ptk.common.ast.Location;


%% /**************************************************************** Options **/


%type Object
%class EncodingValidatorLexer

%public
%unicode
%line
%column


/***************************************************************** Java Code **/


%{
	private EntityMap entityMap = null;
	
	private StringBuilder text = new StringBuilder();
	
	private String file;
	
	public void setFile(String file)
	{
		this.file = file;
	}
	
	public String getFile()
	{
		return file;
	}
	
	public void setEntityMap(EntityMap entityMap)
	{
		this.entityMap = entityMap;
	}
	
	public EntityMap getEntityMap()
	{
		return entityMap;
	}
	
	private void wrapIllegalCodePoint(int line, int column, String codePoint, IllegalCodePointType type)
	{
		IllegalCodePoint p = new IllegalCodePoint(codePoint, type);
		p.setNativeLocation(new Location(
				file,
				line,
				column));
		
		int id = entityMap.registerEntity(p);
		
		text.append('\uE000');
		text.append(id);
		text.append('\uE001');
	}
	
	public String getWikitext()
	{
		return text.toString();
	}
%}


/******************************************************************** Macros **/


IC_HIGH_NO_LOW      = [\uD800-\uDBFF][\u0000-\uDBFF\uE000-\uFFFF]
IC_NO_HIGH_LOW      = [\u0000-\uD7FF\uDC00-\uFFFF][\uDC00-\uDFFF]

NC_00FDD0_to_00FDEF = [\uFDD0-\uFDEF]
NC_00FFFE_or_00FFFF = [\uFFFE\uFFFF]
NC_xxFFFE_or_xxFFFF = [\uD83F\uD87F\uD8BF\uD8FF\uD93F\uD97F\uD9BF\uD9FF\uDA3F\uDA7F\uDABF\uDAFF\uDB3F\uDB7F\uDBBF\uDBFF][\uDFFE\uDFFF]
NON_CHAR            = {NC_00FDD0_to_00FDEF}|{NC_00FFFE_or_00FFFF}|{NC_xxFFFE_or_xxFFFF}

PU_00E000_to_00F8FF = [\uE000-\uF8FF]
PU_0F0000_to_0FFFFD = [\uDB80-\uDBBE][\uDC00-\uDFFF]|\uDBBF[\uDC00-\uDFFD]
PU_100000_to_10FFFD = [\uDBC0-\uDBFE][\uDC00-\uDFFF]|\uDBFF[\uDC00-\uDFFD]
PRIVATE_USE_CHAR    = {PU_00E000_to_00F8FF}|{PU_0F0000_to_0FFFFD}|{PU_100000_to_10FFFD}

CONTROL_CHAR        = [\u0000-\u0008\u000B\u000C\u000E-\u0019\u007F]


%% /****************************************************************** Rules **/


/* Isolated surrogates
 */
{IC_HIGH_NO_LOW}      {
                        String match = yytext();
                        wrapIllegalCodePoint(
                            yyline,
                            yycolumn,
                            match.substring(0, 1),
                            IllegalCodePointType.ISOLATED_SURROGATE);
                        text.append(match.substring(1));
                      }
{IC_NO_HIGH_LOW}      {
                        String match = yytext();
                        text.append(match.substring(0, 1));
                        wrapIllegalCodePoint(
                            yyline,
                            yycolumn + 1,
                            match.substring(1),
                            IllegalCodePointType.ISOLATED_SURROGATE);
                      }


/* Non-character
 */
{NON_CHAR}            {
                        wrapIllegalCodePoint(
                            yyline, 
                            yycolumn, 
                            yytext(), 
                            IllegalCodePointType.NON_CHARACTER);
                      }


/* Private-use character
 */
{PRIVATE_USE_CHAR}    {
                        wrapIllegalCodePoint(
                            yyline, 
                            yycolumn, 
                            yytext(), 
                            IllegalCodePointType.PRIVATE_USE_CHARACTER);
                      }


/* Private-use character
 */
{CONTROL_CHAR}        {
                        wrapIllegalCodePoint(
                            yyline, 
                            yycolumn, 
                            yytext(), 
                            IllegalCodePointType.CONTROL_CHARACTER);
                      }


/* Everything else
 */
.                     |
\n                    {
                        text.append(yytext());
                      }


/************************************************************** End of file. **/




© 2015 - 2024 Weber Informatics LLC | Privacy Policy