All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.scudata.expression.mfn.string.Words Maven / Gradle / Ivy

Go to download

SPL(Structured Process Language) A programming language specially for structured data computing.

There is a newer version: 20240823
Show newest version
package com.scudata.expression.mfn.string;

import com.scudata.common.MessageManager;
import com.scudata.common.RQException;
import com.scudata.dm.Context;
import com.scudata.dm.Sequence;
import com.scudata.expression.StringFunction;
import com.scudata.resources.EngineMessage;

/**
 * ???ַ????е?Ӣ?ﵥ?ʲ?????ַ??????з???
 * s.words()
 * @author RunQian
 *
 */
public class Words extends StringFunction {
	public Object calculate(Context ctx) {
		if (param != null) {
			MessageManager mm = EngineMessage.get();
			throw new RQException("words" + mm.getMessage("function.invalidParam"));
		}
		
		if (srcStr.length() == 0) {
			return null;
		}
		
		boolean iopt = false;
		if (option != null) {
			if (option.indexOf('i') != -1) {
				iopt = true;
			}
			
			if (option.indexOf('a') != -1) {
				return splitWordAndDigit(srcStr, iopt);
			} else if (option.indexOf('d') != -1) {
				return splitDigit(srcStr);
			} else if (option.indexOf('w') != -1) {
				return splitAll(srcStr, iopt, option.indexOf('p') != -1);
			}
		}
		
		return splitWords(srcStr, iopt);
	}
	
	private static boolean isWord(char c) {
		return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
	}
	
	private static boolean isDigit(char c) {
		return c >= '0' && c <= '9';
	}
	
	private static Sequence splitWords(String str, boolean iopt) {
		Sequence series = new Sequence();
		char []chars = str.toCharArray();
		int len = chars.length;

		if (iopt) {
			for (int i = 0; i < len;) {
				if (isWord(chars[i])) {
					int end = i + 1;
					for (; end < len; ++end) {
						if (chars[end] == '\'') {
							if (end + 1 < len && isWord(chars[end + 1])) {
								++end;
							} else {
								break;
							}
						} else if (!isWord(chars[end]) && !isDigit(chars[end])) {
							break;
						}
					}
					
					series.add(new String(chars, i, end - i));
					i = end + 1;
				} else {
					++i;
				}
			}
		} else {
			for (int i = 0; i < len;) {
				if (isWord(chars[i])) {
					int end = i + 1;
					for (; end < len; ++end) {
						if (chars[end] == '\'') {
							if (end + 1 < len && isWord(chars[end + 1])) {
								++end;
							} else {
								break;
							}
						} if (!isWord(chars[end])) {
							break;
						}
					}
					
					series.add(new String(chars, i, end - i));
					i = end + 1;
				} else {
					++i;
				}
			}
		}

		return series;
	}

	private static Sequence splitDigit(String str) {
		Sequence series = new Sequence();
		char []chars = str.toCharArray();
		int len = chars.length;

		for (int i = 0; i < len;) {
			if (isDigit(chars[i])) {
				int end = i + 1;
				for (; end < len && isDigit(chars[end]); ++end) {
				}
								
				series.add(new String(chars, i, end - i));
				i = end + 1;
			} else {
				++i;
			}
		}

		return series;
	}

	private static Sequence splitWordAndDigit(String str, boolean iopt) {
		Sequence series = new Sequence();
		char []chars = str.toCharArray();
		int len = chars.length;

		if (iopt) {
			for (int i = 0; i < len;) {
				if (isWord(chars[i])) {
					int end = i + 1;
					for (; end < len; ++end) {
						if (chars[end] == '\'') {
							if (end + 1 < len && isWord(chars[end + 1])) {
								++end;
							} else {
								break;
							}
						} if (!isWord(chars[end]) && !isDigit(chars[end])) {
							break;
						}
					}
					
					series.add(new String(chars, i, end - i));
					i = end;
				} else if (isDigit(chars[i])) {
					int end = i + 1;
					for (; end < len && isDigit(chars[end]); ++end) {
					}
									
					series.add(new String(chars, i, end - i));
					i = end;
				} else {
					++i;
				}
			}
		} else {
			for (int i = 0; i < len;) {
				if (isWord(chars[i])) {
					int end = i + 1;
					for (; end < len; ++end) {
						if (chars[end] == '\'') {
							if (end + 1 < len && isWord(chars[end + 1])) {
								++end;
							} else {
								break;
							}
						} if (!isWord(chars[end])) {
							break;
						}
					}
					
					series.add(new String(chars, i, end - i));
					i = end;
				} else if (isDigit(chars[i])) {
					int end = i + 1;
					for (; end < len && isDigit(chars[end]); ++end) {
					}
									
					series.add(new String(chars, i, end - i));
					i = end;
				} else {
					++i;
				}
			}
		}

		return series;
	}

	// ???֡???.??????:????'-'??'/'
	private static boolean isDigit(char c, boolean popt) {
		if (isDigit(c)) {
			return true;
		} else if (popt) {
			return c == '.' || c == ':' || c == '-' || c == '/';
		} else {
			return false;
		}
	}
	
	private static Sequence splitAll(String str, boolean iopt, boolean popt) {
		Sequence series = new Sequence();
		char []chars = str.toCharArray();
		int len = chars.length;

		if (iopt) {
			for (int i = 0; i < len;) {
				if (isWord(chars[i])) {
					int end = i + 1;
					for (; end < len; ++end) {
						if (chars[end] == '\'') {
							if (end + 1 < len && isWord(chars[end + 1])) {
								++end;
							} else {
								break;
							}
						} if (!isWord(chars[end]) && !isDigit(chars[end])) {
							break;
						}
					}
					
					series.add(new String(chars, i, end - i));
					i = end;
				} else if (isDigit(chars[i], popt)) {
					int end = i + 1;
					for (; end < len && isDigit(chars[end], popt); ++end) {
					}
									
					series.add(new String(chars, i, end - i));
					i = end;
				} else {
					if (i + 1< len && Character.isHighSurrogate(chars[i])) {
						series.add(new String(chars, i, 2));
						i += 2;
					} else {
						series.add(new String(chars, i, 1));
						++i;
					}
				}
			}
		} else {
			for (int i = 0; i < len;) {
				if (isWord(chars[i])) {
					int end = i + 1;
					for (; end < len; ++end) {
						if (chars[end] == '\'') {
							if (end + 1 < len && isWord(chars[end + 1])) {
								++end;
							} else {
								break;
							}
						} if (!isWord(chars[end])) {
							break;
						}
					}
					
					series.add(new String(chars, i, end - i));
					i = end;
				} else if (isDigit(chars[i], popt)) {
					int end = i + 1;
					for (; end < len && isDigit(chars[end], popt); ++end) {
					}
									
					series.add(new String(chars, i, end - i));
					i = end;
				} else {
					if (Character.isHighSurrogate(chars[i]) && i + 1 < len && Character.isLowSurrogate(chars[i + 1])) {
						series.add(new String(chars, i, 2));
						i += 2;
					} else {
						series.add(new String(chars, i, 1));
						++i;
					}
				}
			}
		}

		return series;
	}
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy