All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.houbb.special.chars.test.data.CombineTest Maven / Gradle / Ivy

There is a newer version: 0.0.2
Show newest version
package com.github.houbb.special.chars.test.data;

import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CharsetUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import org.junit.Ignore;
import org.junit.Test;

import java.util.*;

/**
 *
 * 去重,备注。
 *
 * key: 中文
 * value: 表情
 * eng: 英语
 * @author binbin.hou
 * @since 0.0.1
 */
@Ignore
public class CombineTest {

    @Test
    public void combineKeAiTest() {
        final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\可爱符号表情.txt";
        final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\可爱符号表情_F.txt";

        List lines = FileUtil.readAllLines(o);

        List results = Guavas.newArrayList();
        StringBuilder stringBuilder = new StringBuilder();
        for(int i = 0; i < lines.size(); i++) {
            // 奇数个
            if(i % 2 == 1) {
                stringBuilder.append(",").append(lines.get(i));
                results.add(stringBuilder.toString());
                stringBuilder.setLength(0);
                continue;
            }

            stringBuilder.append(lines.get(i));
        }

        FileUtil.write(t, results);
    }

    @Test
    public void oftenSimpleTest() {
        final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\常用简单表情符号.txt";
        final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\常用简单表情符号_F.txt";

        List lines = FileUtil.readAllLines(o);

        List results = Guavas.newArrayList();
        StringBuilder stringBuilder = new StringBuilder();
        for(int i = 0; i < lines.size(); i++) {
            // 奇数个
            if(i % 2 == 1) {
                String result = lines.get(i)+","+stringBuilder.toString();
                results.add(result);
                stringBuilder.setLength(0);
                continue;
            }

            stringBuilder.append(lines.get(i));
        }

        FileUtil.write(t, results);
    }

    @Test
    public void lastBlankTest() {
        final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\网络表情符号_F.txt";
        final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\网络表情符号_F2.txt";

        List lines = FileUtil.readAllLines(o);

        List results = Guavas.newArrayList();
        for(String line : lines) {
            System.out.println(line);
            int lastBlank = line.lastIndexOf(" ");
            String desc = line.substring(lastBlank);
            String face = line.substring(0, lastBlank);

            String result = desc.trim()+","+face.trim();
            results.add(result);
        }

        FileUtil.write(t, results);
    }


    @Test
    public void lastBlank2Test() {
        final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情说明.txt";
        final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情说明_F.txt";

        List lines = FileUtil.readAllLines(o);

        List results = Guavas.newArrayList();
        for(String line : lines) {
            if(StringUtil.isEmptyTrim(line)) {
                continue;
            }

            System.out.println(line);
            int lastBlank = line.lastIndexOf(" ");
            String desc = line.substring(lastBlank);
            String face = line.substring(0, lastBlank);

            String result = desc.trim()+","+face.trim();
            results.add(result);
        }

        FileUtil.write(t, results);
    }

    @Test
    public void removeChineseAndDistinctTest() {
        final String o = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情_all_in_one.txt";
        final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情_all_in_one_F.txt";

        List lines = FileUtil.readAllLines(o);
        List results = Guavas.newArrayList();

        for(String line: lines) {
            char[] chars = line.toCharArray();
            // 移除中文
            StringBuilder stringBuilder = new StringBuilder();
            for(char ch : chars) {
//                if(CharsetUtil.isChinese(ch) || CharUtil.isDigitOrLetter(ch)) {
//                    continue;
//                }

                if(CharsetUtil.isChinese(ch)) {
                    continue;
                }

                stringBuilder.append(ch);
            }
            String result = stringBuilder.toString().trim();
            if(result.length() <= 1) {
                // 跳过单个字符
                continue;
            }
            results.add(result);
        }

        results = CollectionUtil.distinctAndSort(results);

        FileUtil.write(t, results);
    }

    @Test
    public void biaoqingDescTest() {
        final Map descMap = FileUtil.readToMap("D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\bq_item_desc.txt",
                " ");

        final List faces = FileUtil.readAllLines("D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情_all_in_one_F.txt");
        final String t = "D:\\_github\\special-char\\special-char-test\\src\\main\\resources\\fhdq\\detail\\index_nav_bq\\表情_all_in_one_F2.txt";

        List results = Guavas.newArrayList();
        for(String line : faces) {
            if(line.contains(",")) {
                results.add(line);
            } else {
                // 处理每一个字符
                Set set = new HashSet<>();
                char[] chars = line.toCharArray();
                for(char c : chars) {
                    String desc = descMap.get(String.valueOf(c));
                    if(StringUtil.isNotEmpty(desc)) {
                        String[] descS = desc.split(":");
                        set.addAll(Arrays.asList(descS));
                    }
                }

                // 处理
                String result = line;
                String descFull = StringUtil.join(set, ":");
                if(StringUtil.isNotEmpty(descFull)) {
                    result = descFull+","+result;
                }

                results.add(result);
            }
        }

        FileUtil.write(t, results);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy