All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jtransc.text.JTranscRegex Maven / Gradle / Ivy

package com.jtransc.text;

import com.jtransc.JTranscSystem;
import com.jtransc.annotation.JTranscMethodBody;
import com.jtransc.annotation.haxe.HaxeAddMembers;
import com.jtransc.annotation.haxe.HaxeMethodBody;

import java.util.ArrayList;

// Small footprint regular expressions used by String, not requiring full Pattern/Matcher
public class JTranscRegex {
	static public final class Pattern implements java.io.Serializable {
		public static final int UNIX_LINES = 0x01;
		public static final int CASE_INSENSITIVE = 0x02;
		public static final int COMMENTS = 0x04;
		public static final int MULTILINE = 0x08;
		public static final int LITERAL = 0x10;
		public static final int DOTALL = 0x20;
		public static final int UNICODE_CASE = 0x40;
		public static final int CANON_EQ = 0x80;
		public static final int UNICODE_CHARACTER_CLASS = 0x100;

		public static Pattern compile(String regex) {
			return new Pattern(regex, 0);
		}

		public static Pattern compile(String regex, int flags) {
			return new Pattern(regex, flags);
		}

		private String pattern;
		private int flags;

		private Pattern(String pattern, int flags) {
			this.pattern = pattern;
			this.flags = flags;
		}

		public String pattern() {
			return pattern;
		}

		public int flags() {
			return flags;
		}

		public String toString() {
			return pattern;
		}

		public Matcher matcher(CharSequence input) {
			return new Matcher(this, input);
		}

		public static boolean matches(String regex, CharSequence input) {
			return compile(regex).matcher(input).matches();
		}

		public String[] split(CharSequence input, int limit) {
			int index = 0;
			boolean matchLimited = limit > 0;
			ArrayList matchList = new ArrayList();
			Matcher m = matcher(input);

			//int count = 1000;
			while (m.find()) {
				//System.out.println(m.start() + ":" + m.end());
				//if (count-- <= 0) {
				//	throw new RuntimeException("error!");
				//}
				if (!matchLimited || matchList.size() < limit - 1) {
					if (index == 0 && index == m.start() && m.start() == m.end()) continue;
					String match = input.subSequence(index, m.start()).toString();
					matchList.add(match);
					index = m.end();
				} else if (matchList.size() == limit - 1) { // last one
					String match = input.subSequence(index, input.length()).toString();
					matchList.add(match);
					index = m.end();
				}
			}

			if (index == 0) return new String[]{input.toString()};

			if (!matchLimited || matchList.size() < limit) {
				matchList.add(input.subSequence(index, input.length()).toString());
			}

			int resultSize = matchList.size();
			if (limit == 0) {
				while (resultSize > 0 && matchList.get(resultSize - 1).equals("")) resultSize--;
			}
			String[] result = new String[resultSize];
			return matchList.subList(0, resultSize).toArray(result);
		}

		public String[] split(CharSequence input) {
			return split(input, 0);
		}

		public static String quote(String s) {
			int slashEIndex = s.indexOf("\\E");
			if (slashEIndex == -1) return "\\Q" + s + "\\E";

			StringBuilder sb = new StringBuilder(s.length() * 2);
			sb.append("\\Q");
			slashEIndex = 0;
			int current = 0;
			while ((slashEIndex = s.indexOf("\\E", current)) != -1) {
				sb.append(s.substring(current, slashEIndex));
				current = slashEIndex + 2;
				sb.append("\\E\\\\E\\Q");
			}
			sb.append(s.substring(current, s.length()));
			sb.append("\\E");
			return sb.toString();
		}
	}

	@HaxeAddMembers({
		"public var _pattern:String;",
		"public var _opts:String;",
		"public var _text:String;",
		"public var _ereg:EReg;",
		"public var _matches:Bool;",
		"public var _offset:Int = 0;",
		"public var _matchPos:Int = 0;",
		"public var _matchLen:Int = 0;",
		"" +
			"public function _find() {\n" +
			"\tvar r = this._ereg;\n" +
			"\tthis._matches = r.matchSub(this{% IFIELD com.jtransc.text.JTranscRegex$Matcher:text:Ljava/lang/String; %}._str, this._offset);\n" +
			"\tif (this._matches) {\n" +
			"\t\tvar rpos = r.matchedPos();\n" +
			"\t\tthis._matchPos = rpos.pos;\n" +
			"\t\tthis._matchLen = rpos.len;\n" +
			"\t\tthis._offset = rpos.pos + rpos.len;\n" +
			"\t} else {\n" +
			"\t\tthis._matchPos = 0;\n" +
			"\t\tthis._matchLen = 0;\n" +
			"\t}\n" +
			"\treturn this._matches;\n" +
			"}"
	})
	static public final class Matcher {
		private Pattern parent;
		private String pattern;
		private int flags;
		private String flagsString;
		private String text;
		private String subtext;
		private int matchStart = 0;
		private int matchEnd = 0;
		//private String[] groups = null;

		Matcher(Pattern parent, CharSequence text) {
			this.parent = parent;
			this.pattern = parent.pattern();
			this.flags = parent.flags();
			this.text = text.toString();
			this.subtext = this.text;
			flagsString = "";
			if ((flags & 0x02) != 0) flagsString += "i";
			if ((flags & 0x08) != 0) flagsString += "m";
			_init();
		}

		@HaxeMethodBody("" +
			"var opts = '';\n" +
			"var flags = this{% IFIELD com.jtransc.text.JTranscRegex$Matcher:flags %};\n" +
			"var pattern = this{% IFIELD com.jtransc.text.JTranscRegex$Matcher:pattern %};\n" +
			"var text = this{% IFIELD com.jtransc.text.JTranscRegex$Matcher:text %};\n" +
			"if ((flags & 0x02) != 0) opts += 'i';\n" +
			"if ((flags & 0x08) != 0) opts += 'm';\n" +
			//"if ((this.flags & 0x20) != 0) opts += 's';\n" + // dotall default on javascript
			"this._pattern = pattern._str;\n" +
			"this._opts = opts;\n" +
			"this._text = text._str;\n" +
			"this._ereg = new EReg(pattern._str, opts);\n" +
			"this._matches = (new EReg('^' + pattern._str + '$', opts)).match(text._str);"
		)
		@JTranscMethodBody(target = "js", value = {
			"this._ereg = new RegExp(N.istr(this._pattern), N.istr(this._flagsString + 'g'));",
			"var flags = this{% IFIELD com.jtransc.text.JTranscRegex$Matcher:flags %};",
			"var opts = '';",
			"if ((flags & 0x02) != 0) opts += 'i';",
			"if ((flags & 0x08) != 0) opts += 'm';",
			"this._opts = opts;",
		})
		private void _init() {
		}

		public Pattern pattern() {
			return this.parent;
		}

		@HaxeMethodBody("return this._matchPos;")
		public int start() {
			return this.matchStart;
		}

		public int start(int group) {
			if (group == 0) return start();
			JTranscSystem.debugger();
			throw new Error("No implemented Matcher.start(int group) with group != 0");
		}

		@HaxeMethodBody("return this._matchPos + this._matchLen;")
		public int end() {
			return this.matchEnd;
		}

		public int end(int group) {
			if (group == 0) return end();
			JTranscSystem.debugger();
			throw new Error("No implemented Matcher.end(int group) with group != 0");
		}

		@HaxeMethodBody("return N.str(this._ereg.matched(0));")
		public String group() {
			return group(0);
		}

		@HaxeMethodBody("return N.str(this._ereg.matched(p0));")
		@JTranscMethodBody(target = "js", value = "return N.str(this._groups[p0]);")
		native public String group(int group);

		@HaxeMethodBody("return this._matches;")
		@JTranscMethodBody(target = "js", value = "return (new RegExp('^' + N.istr(this._pattern) + '$', opts)).test(N.istr(this._text));")
		native public boolean matches();

		@HaxeMethodBody("return _find();")
		public boolean find() {
			return _find();
		}

		@HaxeMethodBody("this._offset = p0; return _find();")
		public boolean find(int start) {
			this.subtext = text.substring(start);
			return _find();
		}

		@JTranscMethodBody(target = "js", value = {
			"this._groups = this._ereg.exec(N.istr(this._subtext));",
			"this._matchStart = (this._groups) ? this._groups.index : -1;",
			"this._matchEnd   = this._ereg.lastIndex;",
			"return this._groups != null;",
		})
		native private boolean _find();

		public String replaceAll(String replacement) {
			return replaceFirstAll(replacement, true);
		}

		public String replaceFirst(String replacement) {
			return replaceFirstAll(replacement, false);
		}

		@HaxeMethodBody("return N.str(new EReg(this._pattern, p1 ? (this._opts + 'g') : (this._opts)).replace(this._text, p0._str));")
		@JTranscMethodBody(target = "js", value = {
			"var opts = p1 ? (this._opts + 'g') : (this._opts);",
			"var text = N.istr(this{% IFIELD com.jtransc.text.JTranscRegex$Matcher:text %});",
			"var pattern = N.istr(this{% IFIELD com.jtransc.text.JTranscRegex$Matcher:pattern %});",
			"return N.str(text.replace(new RegExp(pattern, opts), N.istr(p0)));"
		})
		native private String replaceFirstAll(String replacement, boolean all);

	}
	//public static final int UNIX_LINES = 0x01;
	//public static final int CASE_INSENSITIVE = 0x02;
	//public static final int COMMENTS = 0x04;
	//public static final int MULTILINE = 0x08;
	//public static final int LITERAL = 0x10;
	//public static final int DOTALL = 0x20;
	//public static final int UNICODE_CASE = 0x40;
	//public static final int CANON_EQ = 0x80;
	//public static final int UNICODE_CHARACTER_CLASS = 0x100;
	//i case insensitive matching
	//g global replace or split, see below
	//m multiline matching, ^ and $ represent the beginning and end of a line
	//s the dot . will also match newlines (Neko, C++, PHP, Flash and Java targets only)
	//u use UTF-8 matching (Neko and C++ targets only)
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy