All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.luna.baidu.api.BaiduTextApi Maven / Gradle / Ivy

The newest version!
package com.luna.baidu.api;

import java.io.IOException;
import java.util.HashMap;

import com.luna.common.net.HttpUtils;
import com.luna.common.net.HttpUtilsConstant;
import com.luna.common.text.CharsetKit;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.luna.baidu.dto.text.TextSimilarResultDTO;
import com.luna.baidu.dto.text.TextSimnetResultDTO;

/**
 * @author Luna@win10
 * @date 2020/5/24 21:10
 */
public class BaiduTextApi {

    private static final Logger log = LoggerFactory.getLogger(BaiduTextApi.class);

    /**
     * 百度文本纠错
     * 
     * @param text
     * @throws IOException
     */
    public static String correction(String key, String text) {
        log.info("correction start key={}, text={}", key, text);
        String body = "{\"text\": \"" + text + "\"" + "}";
        HttpResponse httpResponse = HttpUtils.doPost(BaiduApiConstant.HOST, BaiduApiConstant.LANGUAGE_PROCESSING,
            ImmutableMap.of("Content-Type", HttpUtilsConstant.JSON),
            ImmutableMap.of("access_token", key, "charset", CharsetKit.UTF_8),
            body);
        String s = HttpUtils.checkResponseAndGetResult(httpResponse, true);
        JSONObject jsonObject = JSON.parseObject(JSON.parseObject(s).get("item").toString());
        String query = jsonObject.get("correct_query").toString();
        log.info("correction success query={}, text={}", query, text);
        return query;
    }

    /**
     * 文本相似度比较 短文本相似度接口用来判断两个文本的相似度得分。
     *
     * BOW(词包)模型=>基于bag of words的BOW模型,特点是泛化性强,效率高,比较轻量级,适合任务:输入序列的 term “确切匹配”、不关心序列的词序关系,对计算效率有很高要求;
     *
     * GRNN(循环神经网络)模型=>基于recurrent,擅长捕捉短文本“跨片段”的序列片段关系,适合任务:对语义泛化要求很高,对输入语序比较敏感的任务;
     *
     * CNN(卷积神经网络)模型=>模型语义泛化能力介于 BOW/RNN 之间,对序列输入敏感,相较于 GRNN 模型的一个显著优点是计算效率会更高些。
     * 
     * @param key
     * @param model
     * @return
     */
    public static TextSimnetResultDTO similarityText(String key, String text1, String text2, String model) {
        log.info("similarityText start key={},text1={},text2={},model={}", key, text1, text2, model);
        if (StringUtils.isEmpty(model)) {
            model = "BOW";
        }
        HashMap textParam = Maps.newHashMap();
        textParam.put("text_1", text1);
        textParam.put("text_2", text2);
        textParam.put("model", model);
        HttpResponse httpResponse = HttpUtils.doPost(BaiduApiConstant.HOST, BaiduApiConstant.TEXT_SIMILARITY,
            ImmutableMap.of("Content-Type", HttpUtilsConstant.JSON),
            ImmutableMap.of("access_token", key, "charset", CharsetKit.UTF_8),
            JSON.toJSONString(textParam));
        String s = HttpUtils.checkResponseAndGetResult(httpResponse, true);
        TextSimnetResultDTO textSimnetResultDTO = JSON.parseObject(s, TextSimnetResultDTO.class);
        log.info("similarityText success key={},textSimnetResultDTO={}", key, JSON.toJSONString(textSimnetResultDTO));
        return textSimnetResultDTO;
    }

    /**
     * 词语比较 输入两个词,得到两个词的相似度结果。
     * 
     * @param word1
     * @param word2
     * @return
     * @throws IOException
     */
    public static TextSimilarResultDTO similarityWords(String key, String word1, String word2) {
        log.info("similarityWords start key={},text1={},text2={}", key, word1, word2);
        HashMap wordParam = Maps.newHashMap();
        wordParam.put("word_1", word1);
        wordParam.put("word_2", word2);
        HttpResponse httpResponse = HttpUtils.doPost(BaiduApiConstant.HOST, BaiduApiConstant.WOEDS_SIMILARITY,
            ImmutableMap.of("Content-Type", HttpUtilsConstant.JSON),
            ImmutableMap.of("access_token", key, "charset", CharsetKit.UTF_8),
            JSON.toJSONString(wordParam));
        String s = HttpUtils.checkResponseAndGetResult(httpResponse, true);
        TextSimilarResultDTO similarResultDTO = JSON.parseObject(s, TextSimilarResultDTO.class);
        log.info("similarityWords success key={},similarResultDTO={}", key, JSON.toJSONString(similarResultDTO));
        return similarResultDTO;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy