com.github.houbb.special.chars.test.data.CombineTest Maven / Gradle / Ivy
package com.github.houbb.special.chars.test.data;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CharsetUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import org.junit.Ignore;
import org.junit.Test;
import java.util.*;
/**
*
* 去重,备注。
*
* key: 中文
* value: 表情
* eng: 英语
* @author binbin.hou
* @since 0.0.1
*/
@Ignore
public class CombineTest {
@Test
public void combineKeAiTest() {
final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\可爱符号表情.txt";
final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\可爱符号表情_F.txt";
List lines = FileUtil.readAllLines(o);
List results = Guavas.newArrayList();
StringBuilder stringBuilder = new StringBuilder();
for(int i = 0; i < lines.size(); i++) {
// 奇数个
if(i % 2 == 1) {
stringBuilder.append(",").append(lines.get(i));
results.add(stringBuilder.toString());
stringBuilder.setLength(0);
continue;
}
stringBuilder.append(lines.get(i));
}
FileUtil.write(t, results);
}
@Test
public void oftenSimpleTest() {
final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\常用简单表情符号.txt";
final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\常用简单表情符号_F.txt";
List lines = FileUtil.readAllLines(o);
List results = Guavas.newArrayList();
StringBuilder stringBuilder = new StringBuilder();
for(int i = 0; i < lines.size(); i++) {
// 奇数个
if(i % 2 == 1) {
String result = lines.get(i)+","+stringBuilder.toString();
results.add(result);
stringBuilder.setLength(0);
continue;
}
stringBuilder.append(lines.get(i));
}
FileUtil.write(t, results);
}
@Test
public void lastBlankTest() {
final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\网络表情符号_F.txt";
final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\网络表情符号_F2.txt";
List lines = FileUtil.readAllLines(o);
List results = Guavas.newArrayList();
for(String line : lines) {
System.out.println(line);
int lastBlank = line.lastIndexOf(" ");
String desc = line.substring(lastBlank);
String face = line.substring(0, lastBlank);
String result = desc.trim()+","+face.trim();
results.add(result);
}
FileUtil.write(t, results);
}
@Test
public void lastBlank2Test() {
final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情说明.txt";
final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情说明_F.txt";
List lines = FileUtil.readAllLines(o);
List results = Guavas.newArrayList();
for(String line : lines) {
if(StringUtil.isEmptyTrim(line)) {
continue;
}
System.out.println(line);
int lastBlank = line.lastIndexOf(" ");
String desc = line.substring(lastBlank);
String face = line.substring(0, lastBlank);
String result = desc.trim()+","+face.trim();
results.add(result);
}
FileUtil.write(t, results);
}
@Test
public void removeChineseAndDistinctTest() {
final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情_all_in_one.txt";
final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情_all_in_one_F.txt";
List lines = FileUtil.readAllLines(o);
List results = Guavas.newArrayList();
for(String line: lines) {
char[] chars = line.toCharArray();
// 移除中文
StringBuilder stringBuilder = new StringBuilder();
for(char ch : chars) {
// if(CharsetUtil.isChinese(ch) || CharUtil.isDigitOrLetter(ch)) {
// continue;
// }
if(CharsetUtil.isChinese(ch)) {
continue;
}
stringBuilder.append(ch);
}
String result = stringBuilder.toString().trim();
if(result.length() <= 1) {
// 跳过单个字符
continue;
}
results.add(result);
}
results = CollectionUtil.distinctAndSort(results);
FileUtil.write(t, results);
}
@Test
public void biaoqingDescTest() {
final Map descMap = FileUtil.readToMap("D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\bq_item_desc.txt",
" ");
final List faces = FileUtil.readAllLines("D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情_all_in_one_F.txt");
final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情_all_in_one_F2.txt";
List results = Guavas.newArrayList();
for(String line : faces) {
if(line.contains(",")) {
results.add(line);
} else {
// 处理每一个字符
Set set = new HashSet<>();
char[] chars = line.toCharArray();
for(char c : chars) {
String desc = descMap.get(String.valueOf(c));
if(StringUtil.isNotEmpty(desc)) {
String[] descS = desc.split(":");
set.addAll(Arrays.asList(descS));
}
}
// 处理
String result = line;
String descFull = StringUtil.join(set, ":");
if(StringUtil.isNotEmpty(descFull)) {
result = descFull+","+result;
}
results.add(result);
}
}
FileUtil.write(t, results);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy