org.unlaxer.jaddress.gremlin.TsvData Maven / Gradle / Ivy
package org.unlaxer.jaddress.gremlin;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.unlaxer.jaddress.entity.standard.階層要素;
public class TsvData {
Logger logger = LoggerFactory.getLogger(getClass());
private Pattern p = Pattern.compile("[0-9]+");
private Pattern p1 = Pattern.compile("([市|区|郡])(?!\t)");
// private Pattern p1 = Pattern.compile("([市|区|郡])");
private Pattern p2 = Pattern.compile("([^0-9-\t])([0-9])");
private Pattern p3 = Pattern.compile("([^A-Z-\t])([A-Z])");
// private Pattern p4 = Pattern.compile("([0-9]+)-([0-9]+)");
private List list;
private List<階層要素> kaisouList;
public TsvData(String text) {
super();
String[] data = split(text);
int length = data.length;
kaisouDetector(data, length);
list = new ArrayList<>();
// list.add(0, new DataElement(0, text, kaisouList.get(0)));
for (int i = 0; i < length; i++) {
list.add(new DataElement(i, data[i], kaisouList.get(i), i));
}
}
public List getData() {
return list;
}
public void debugPrint() {
// list.forEach(e -> logger.info(e.print()));
list.forEach(e -> System.out.println(e.print()));
}
private String[] split(String line) {
line = line.replaceAll("[ | ]+", "\t");
line = line.replaceAll("\t+", "\t");
line = p1.matcher(line).replaceFirst("$1\t");
line = p2.matcher(line).replaceAll("$1\t$2");
line = p3.matcher(line).replaceAll("$1\t$2");
// line = p4.matcher(line).replaceAll("$1\t$2");
return line.split("\t|-");
}
private void kaisouDetector(String[] data, int length) {
kaisouList = new ArrayList<>();
kaisouList.add(0, 階層要素.ダミーサフィックス);
for (int i = 1; i < length; i++) {
try {
String val = data[i];
// if(val.isBlank()) {
// continue;
// }
if (i == 1) {
kaisouList.add(i, 階層要素.都道府県);
continue;
}
if (i == 2) {
if (val.endsWith("市")) {
kaisouList.add(i, 階層要素.市);
continue;
}
if (val.endsWith("区")) {
kaisouList.add(i, 階層要素.東京23区);
continue;
}
if (val.endsWith("郡")) {
kaisouList.add(i, 階層要素.群);
continue;
}
}
if (i == 3) {
if (val.endsWith("区")) {
kaisouList.add(i, 階層要素.区);
continue;
}
}
boolean isNumBlock = p.matcher(data[i]).matches();
if (i >= 3 && i <= 7) {
if (length > i + 1 && p.matcher(data[i + 1]).matches() && !isNumBlock) {
if (!kaisouList.contains(階層要素.町または大字)) {
kaisouList.add(i, 階層要素.町または大字);
continue;
}
}
if (isNumBlock) {
if (!kaisouList.contains(階層要素.丁目)) {
kaisouList.add(i, 階層要素.丁目);
continue;
}
if (!kaisouList.contains(階層要素.地番)) {
kaisouList.add(i, 階層要素.地番);
continue;
}
if (!kaisouList.contains(階層要素.支号)) {
kaisouList.add(i, 階層要素.支号);
continue;
}
}
}
if (i >= 6) {
if (!isNumBlock) {
if (val.endsWith("方")) {
kaisouList.add(i, 階層要素.方書き);
continue;
} else {
kaisouList.add(i, 階層要素.建物);
continue;
}
}
if (val.endsWith("棟")) {
kaisouList.add(i, 階層要素.棟);
continue;
}
if (kaisouList.contains(階層要素.建物)) {
kaisouList.add(i, 階層要素.部屋番号);
continue;
}
}
kaisouList.add(i, 階層要素.ダミーサフィックス);
} catch (IndexOutOfBoundsException e) {
e.printStackTrace();
throw e;
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy