All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.unlaxer.jaddress.gremlin.TsvData Maven / Gradle / Ivy

package org.unlaxer.jaddress.gremlin;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.unlaxer.jaddress.entity.standard.階層要素;

public class TsvData {
	Logger logger = LoggerFactory.getLogger(getClass());

	private Pattern p = Pattern.compile("[0-9]+");
	private Pattern p1 = Pattern.compile("([市|区|郡])(?!\t)");
//	private Pattern p1 = Pattern.compile("([市|区|郡])");
	private Pattern p2 = Pattern.compile("([^0-9-\t])([0-9])");
	private Pattern p3 = Pattern.compile("([^A-Z-\t])([A-Z])");
//	private Pattern p4 = Pattern.compile("([0-9]+)-([0-9]+)");

	private List list;
	private List<階層要素> kaisouList;

	public TsvData(String text) {
		super();
		String[] data = split(text);
		int length = data.length;

		kaisouDetector(data, length);

		list = new ArrayList<>();
//		list.add(0, new DataElement(0, text, kaisouList.get(0)));
		for (int i = 0; i < length; i++) {
			list.add(new DataElement(i, data[i], kaisouList.get(i), i));
		}
	}

	public List getData() {
		return list;
	}

	public void debugPrint() {
//		list.forEach(e -> logger.info(e.print()));
		list.forEach(e -> System.out.println(e.print()));
	}

	private String[] split(String line) {
		line = line.replaceAll("[ | ]+", "\t");
		line = line.replaceAll("\t+", "\t");
		line = p1.matcher(line).replaceFirst("$1\t");
		line = p2.matcher(line).replaceAll("$1\t$2");
		line = p3.matcher(line).replaceAll("$1\t$2");
//		line = p4.matcher(line).replaceAll("$1\t$2");

		return line.split("\t|-");
	}

	private void kaisouDetector(String[] data, int length) {
		kaisouList = new ArrayList<>();
		kaisouList.add(0, 階層要素.ダミーサフィックス);
		for (int i = 1; i < length; i++) {
			try {
				String val = data[i];
//				if(val.isBlank()) {
//					continue;
//				}

				if (i == 1) {
					kaisouList.add(i, 階層要素.都道府県);
					continue;
				}

				if (i == 2) {
					if (val.endsWith("市")) {
						kaisouList.add(i, 階層要素.市);
						continue;
					}
					if (val.endsWith("区")) {
						kaisouList.add(i, 階層要素.東京23区);
						continue;
					}
					if (val.endsWith("郡")) {
						kaisouList.add(i, 階層要素.群);
						continue;
					}
				}
				if (i == 3) {
					if (val.endsWith("区")) {
						kaisouList.add(i, 階層要素.区);
						continue;
					}
				}

				boolean isNumBlock = p.matcher(data[i]).matches();
				if (i >= 3 && i <= 7) {
					if (length > i + 1 && p.matcher(data[i + 1]).matches() && !isNumBlock) {
						if (!kaisouList.contains(階層要素.町または大字)) {
							kaisouList.add(i, 階層要素.町または大字);
							continue;
						}
					}
					if (isNumBlock) {
						if (!kaisouList.contains(階層要素.丁目)) {
							kaisouList.add(i, 階層要素.丁目);
							continue;
						}
						if (!kaisouList.contains(階層要素.地番)) {
							kaisouList.add(i, 階層要素.地番);
							continue;
						}
						if (!kaisouList.contains(階層要素.支号)) {
							kaisouList.add(i, 階層要素.支号);
							continue;
						}
					}

				}
				if (i >= 6) {
					if (!isNumBlock) {
						if (val.endsWith("方")) {
							kaisouList.add(i, 階層要素.方書き);
							continue;
						} else {
							kaisouList.add(i, 階層要素.建物);
							continue;
						}
					}
					if (val.endsWith("棟")) {
						kaisouList.add(i, 階層要素.棟);
						continue;
					}

					if (kaisouList.contains(階層要素.建物)) {
						kaisouList.add(i, 階層要素.部屋番号);
						continue;
					}

				}

				kaisouList.add(i, 階層要素.ダミーサフィックス);
			} catch (IndexOutOfBoundsException e) {
				e.printStackTrace();
				throw e;
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy