All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ecfront.kwe.KeyWordExtract Maven / Gradle / Ivy

There is a newer version: 1.6
Show newest version
package com.ecfront.kwe;

import javax.script.*;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.nio.file.Files;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

public class KeyWordExtract {

    private static Map> RULES = new HashMap<>();
    private static final String LOCAL_RULE_FILE = "kwe-rules.txt";
    private static final ScriptEngineManager SCRIPT_ENGINE_MANAGER = new ScriptEngineManager();
    private static Invocable invocable;

    static {
        try {
            loadRules(Helper.readAllByClassPath(LOCAL_RULE_FILE));
        } catch (IOException e) {
            throw new RuntimeException("[KWE]Load local rules error", e);
        }
    }

    public static Result extract(String url) {
        try {
            return extract(new URL(url));
        } catch (Exception e) {
            throw new RuntimeException("[KWE]Extract url [" + url + "] error", e);
        }
    }

    public static Result extract(URL url) {
        if (!RULES.containsKey(url.getHost())) {
            return null;
        }
        Set parsers = RULES.get(url.getHost());
        for (Parser parser : parsers) {
            Optional matched = parser.parse(url.getPath(), url.getQuery());
            if (matched.isPresent()) {
                return matched.get();
            }
        }
        return null;
    }

    public static long loadOnlineRules(String ruleUrl) throws IOException {
        return loadRules(Helper.httpGet(ruleUrl));
    }

    private static long loadRules(List rules) {
        AtomicLong counter = new AtomicLong();
        StringBuffer sb = new StringBuffer();
        rules.forEach(rule -> {
            String[] items = rule.split("\\|");
            if (items.length == 6 || items.length == 3) {
                String host = items[1];
                if (!RULES.containsKey(host)) {
                    RULES.put(host, new HashSet<>());
                }
                Parser parser = new Parser(items);
                RULES.get(host).add(parser);
                counter.getAndIncrement();
                if (parser.jsStr != null) {
                    sb.append(parser.jsStr + "\r\n");
                }
            }
        });
        try {
            Compilable jsEngine = (Compilable) SCRIPT_ENGINE_MANAGER.getEngineByName("nashorn");
            CompiledScript script = jsEngine.compile(sb.toString());
            script.eval();
            invocable = (Invocable) script.getEngine();
        } catch (ScriptException e) {
            throw new RuntimeException("[KWE]Init JS Function error", e);
        }
        return counter.get();
    }

    private static class Parser {

        private String name;
        private boolean wdInQuery;
        private int pathIndex;
        private String queryKey;
        private String codec;
        private String enc;
        private String jsFun;
        private String jsStr;

        public String getJsStr() {
            return jsStr;
        }

        private Parser(String[] items) {
            name = items[0];
            if (items.length == 6) {
                wdInQuery = items[2].equalsIgnoreCase("query");
                if (wdInQuery) {
                    queryKey = items[3];
                } else {
                    pathIndex = Integer.valueOf(items[3]);
                }
                codec = items[4];
                enc = items[5];
            } else {
                jsFun = items[1].replaceAll("\\.", "_") + "_" + Math.abs(items[2].hashCode());
                jsStr = "function " + jsFun + "(uri){\r\n" +
                        "var result = '';\r\n" +
                        items[2] + ";\r\n" +
                        "return result;\r\n" +
                        "}\r\n";
            }
        }

        private Optional parse(String path, String query) {
            if (jsFun == null) {
                if (wdInQuery) {
                    if (query == null || query.equals("")) {
                        return Optional.empty();
                    }
                    String[] queryItems = query.split("&");
                    for (String queryItem : queryItems) {
                        if (queryItem.startsWith(queryKey + '=')) {
                            String keyVal = parse(queryItem.substring(queryKey.length() + 1));
                            return Optional.of(new Result(name, keyVal));
                        }
                    }
                } else {
                    String[] pathItems = path.split("/");
                    if (pathItems.length > pathIndex) {
                        String keyVal = parse(pathItems[pathIndex + 1]);
                        return Optional.of(new Result(name, keyVal));
                    }
                }
                return Optional.empty();
            } else {
                try {
                    String keyVal = (String) invocable.invokeFunction(jsFun, path + query);
                    return Optional.of(new Result(name, keyVal));
                } catch (ScriptException | NoSuchMethodException e) {
                    throw new RuntimeException("[KWE]Execute JS Function [" + jsFun + "] error", e);
                }
            }
        }

        private String parse(String encodeValue) {
            try {
                switch (codec.toLowerCase()) {
                    case "decodeuri":
                        return URLDecoder.decode(encodeValue, enc);
                    default:
                        throw new RuntimeException("Decoder[" + codec + "] NOT Exist.");
                }
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException("[KWE]Parse decode [" + codec + "] error", e);
            }
        }

    }

    public static class Result {

        public String name;
        public String value;

        public Result(String name, String value) {
            this.name = name;
            this.value = value;
        }
    }

    private static class Helper {

        private static List readAllByClassPath(String classpath) throws IOException {
            File file = new File(KeyWordExtract.class.getResource("/").getPath() + classpath);
            if (file.exists()) {
                return Files.readAllLines(file.toPath());
            }
            InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream(classpath);
            BufferedReader buffer = new BufferedReader(new InputStreamReader(in));
            return buffer.lines().collect(Collectors.toList());
        }

        private static List httpGet(String url) throws IOException {
            URL getUrl = new URL(url);
            URLConnection connection = getUrl.openConnection();
            connection.setRequestProperty("accept", "*/*");
            connection.setRequestProperty("connection", "Keep-Alive");
            connection.setRequestProperty("user-agent",
                    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
            connection.connect();
            BufferedReader buffer = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            return buffer.lines().collect(Collectors.toList());
        }

    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy