All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.seomse.crawling.core.http.HttpUrl Maven / Gradle / Ivy

/*
 * Copyright (C) 2020 Seomse Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.seomse.crawling.core.http;

import com.seomse.commons.utils.ExceptionUtil;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.net.ssl.*;
import java.io.*;
import java.net.*;
import java.util.Iterator;

/**
 * HttpURLConnection 을 활용한 script
 * @author macle
 */
public class HttpUrl {

	private final static Logger logger = LoggerFactory.getLogger(HttpUrl.class);
	
	/**
	 * url 에 해당하는 스크립트를 얻기
	 * 통신용이기 때문에 오류처리에 대한 메시지도 정의함
	 * optionData
	 * - requestMethod (GET, POST, HEAD, OPTIONS, PUT, DELETE, TRACE)
	 * - requestProperty (+Cookie)
	 * - charSet (def : UTF-8)
	 * - outputStreamValue
	 * - readTimeout (def : 30000)
	 * - connectTimeout (def : 30000)
	 * 
	 * @param url url
	 * @param optionData JSONObject
	 * @return String script
	 */
	public static String getScript(String url, JSONObject optionData) {


		try {
			HttpURLConnection conn = newHttpURLConnection(url, optionData);
			try {
				int MAX_REDIRECT_COUNT = 3;
				for (int i = 0; i < MAX_REDIRECT_COUNT; i++) {
					if (conn.getResponseCode() == HttpsURLConnection.HTTP_MOVED_TEMP
							|| conn.getResponseCode() == HttpsURLConnection.HTTP_MOVED_PERM) {
						// Redirected URL 받아오기
						String redirectedUrl = conn.getHeaderField("Location");
						conn = newHttpURLConnection(redirectedUrl, optionData);
					} else {
						break;
					}
				}

			} catch (IOException e) {
				logger.error(ExceptionUtil.getStackTrace(e));
			}

			String charSet = "UTF-8";

			if (optionData!= null && !optionData.isNull(HttpOptionDataKey.CHARACTER_SET)) {
				try {
					charSet = optionData.getString(HttpOptionDataKey.CHARACTER_SET);
				} catch (JSONException e) {
					logger.error(ExceptionUtil.getStackTrace(e));
				}
			}

			return getScript(conn, charSet);
		}catch(SocketTimeoutException e){
			return HttpError.SOCKET_TIME_OUT.message() +"{" + ExceptionUtil.getStackTrace(e) + "}";
		}catch(ConnectException e){
			return HttpError.CONNECT_FAIL.message() +"{" + ExceptionUtil.getStackTrace(e) + "}";
		}catch(IOException e){
			return HttpError.IO.message() +"{" + ExceptionUtil.getStackTrace(e) + "}";
		}catch(Exception e){
			return HttpError.ERROR.message() +"{" + ExceptionUtil.getStackTrace(e) + "}";
		}
	}

	/**
	 * HttpURLConnection 에 해당 하는 script 를 얻어옴
	 * @param conn HttpURLConnection
	 * @param charSet String
	 * @return String script
	 * @throws IOException IOException
	 */
	public static String getScript(HttpURLConnection conn, String charSet) throws IOException {
		StringBuilder message = new StringBuilder(); 
		BufferedReader br = null;
		try {
			if (conn != null && conn.getResponseCode() == HttpURLConnection.HTTP_OK) {
				
				
				
				if(charSet ==null){
					br = new BufferedReader(
						new InputStreamReader(conn.getInputStream()));
				}else{
					br = new BufferedReader(
							new InputStreamReader(conn.getInputStream(), charSet));
				}
						
				for (;;) {
					String line = br.readLine();
					if (line == null) break;
					message.append(line).append('\n');
				}


				if(message.length()>0){
					//마지막 엔터제거
					message.setLength(message.length()-1);
				}
			}
		} finally{
			//noinspection CatchMayIgnoreException
			try{
				if(br != null) {
					br.close();
				}
			}catch(Exception e){}
		}
		
		return message.toString();
	}

	/**
	 * url에 해당하는 파일을 다운 받아서 filePath 에 저장
	 * @param urlAddress String
	 * @param filePath String save path
	 * @return File
	 * @throws IOException IOException
	 */
	public static File getFile(String urlAddress, String filePath) throws IOException {
		InputStream in = null;
		FileOutputStream fos = null ;
		//noinspection CaughtExceptionImmediatelyRethrown
		try {
			File file = null;
			HttpURLConnection conn = newHttpURLConnection(urlAddress, null);
			
			if (conn != null && conn.getResponseCode() == HttpURLConnection.HTTP_OK) {
				
				file = new File(filePath);
				//noinspection ResultOfMethodCallIgnored
				file.getParentFile().mkdirs();
				if(file.exists()){
					//noinspection ResultOfMethodCallIgnored
					file.delete();
			     }
				//noinspection ResultOfMethodCallIgnored
				file.createNewFile();
				in = conn.getInputStream();
				fos = new FileOutputStream(file);

		        byte[] buffer = new byte[1024];
		        int len1 ;
		        while ((len1 = in.read(buffer)) != -1) {
		            fos.write(buffer, 0, len1);
		        }
		        fos.close();
		        in.close();
				conn.disconnect();
			}
			return file;
		} 
		catch (IOException e) {

			throw e;
		}finally{
			if(in != null){
				//noinspection CatchMayIgnoreException
				try{in.close();}catch(Exception e){}
			}
			if(fos != null){
				//noinspection CatchMayIgnoreException
				try{fos.close();}catch(Exception e){}
			}
		}
	}

	/**
	 * HttpUrlConnection 생성
	 * @param urlAddr String
	 * @param optionData JSONObject
	 * @return HttpURLConnection
	 * @throws IOException IOException
	 */
	public static HttpURLConnection newHttpURLConnection(String urlAddr, JSONObject optionData) throws IOException {

	 	URL url = new URL(urlAddr);
	 	HttpURLConnection conn ;

        String protocol = url.getProtocol();
        if(protocol == null){
        	protocol = "";
        }
        protocol = protocol.toLowerCase();

        if (protocol.equals("https")) {
            trustAllHosts();
            HttpsURLConnection https = (HttpsURLConnection) url.openConnection();
            https.setHostnameVerifier(DO_NOT_VERIFY);
            conn = https;
        } else {
        	conn = (HttpURLConnection) url.openConnection();
        }

        if (conn != null) {
	 		conn.setUseCaches(false);
	 		conn.setDoInput( true ) ;
	 		conn.setDoOutput( true ) ;
	 		conn.setInstanceFollowRedirects( false );

			int connectTimeout = 30000;
			if (optionData == null) {
				conn.setConnectTimeout(connectTimeout);
				conn.setRequestMethod("GET");
				return conn;
			}
			if (!optionData.isNull(HttpOptionDataKey.REQUEST_PROPERTY)) {
				JSONObject property = optionData.getJSONObject(HttpOptionDataKey.REQUEST_PROPERTY);

				Iterator keys = property.keys();
				while (keys.hasNext()) {
					String key = keys.next();
					conn.setRequestProperty(key, property.getString(key));
				}

			}

			if (!optionData.isNull(HttpOptionDataKey.REQUEST_METHOD)) {
				String req = optionData.getString(HttpOptionDataKey.REQUEST_METHOD);
				conn.setRequestMethod(req);
			} else {
				conn.setRequestMethod("GET");
			}




			int readTimeout = 30000;
			if (!optionData.isNull(HttpOptionDataKey.READ_TIME_OUT)) {
				try {
					readTimeout = optionData.getInt(HttpOptionDataKey.READ_TIME_OUT);
				} catch (JSONException e) {
					logger.error(ExceptionUtil.getStackTrace(e));
				}
			}
			conn.setReadTimeout(readTimeout);


			if (!optionData.isNull(HttpOptionDataKey.CONNECT_TIME_OUT)) {
				try {
					connectTimeout = optionData.getInt(HttpOptionDataKey.CONNECT_TIME_OUT);
				} catch (JSONException e) {
					logger.error(ExceptionUtil.getStackTrace(e));
				}
			}
			conn.setConnectTimeout(connectTimeout);

			String charSet = "UTF-8";
			if (!optionData.isNull(HttpOptionDataKey.CHARACTER_SET)) {
				try {
					charSet = optionData.getString(HttpOptionDataKey.CHARACTER_SET);
				} catch (JSONException e) {
					logger.error(ExceptionUtil.getStackTrace(e));
				}
			}

			if (!optionData.isNull(HttpOptionDataKey.OUTPUT_STREAM_WRITE)) {
				byte[] contents;
				String outputStreamValue = optionData.getString(HttpOptionDataKey.OUTPUT_STREAM_WRITE);
				contents = outputStreamValue.getBytes(charSet);
				OutputStream outSteam = conn.getOutputStream();
				outSteam.write(contents);
				outSteam.flush();
				outSteam.close();
			}

        }

        return conn;
	}
	
    private static void trustAllHosts() { 
        // Create a trust manager that does not validate certificate chains 
        TrustManager[] trustAllCerts = new TrustManager[] { new X509TrustManager() { 
                public java.security.cert.X509Certificate[] getAcceptedIssuers() { 
                        return new java.security.cert.X509Certificate[] {}; 
                } 
 
                public void checkClientTrusted( 
                        java.security.cert.X509Certificate[] chain, 
                        String authType) {

                } 
 
                public void checkServerTrusted( 
                        java.security.cert.X509Certificate[] chain, 
                        String authType) {

                } 
        } }; 
 
        // Install the all-trusting trust manager 
        try { 
                SSLContext sc = SSLContext.getInstance("TLS"); 
                sc.init(null, trustAllCerts, new java.security.SecureRandom()); 
                HttpsURLConnection 
                                .setDefaultSSLSocketFactory(sc.getSocketFactory()); 
        } catch (Exception e) { 
                e.printStackTrace(); 
        } 
    } 

	private final static HostnameVerifier DO_NOT_VERIFY = (arg0, arg1) -> true;
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy