All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.seomse.crawling.core.http.HttpUrlConnManager Maven / Gradle / Ivy

/*
 * Copyright (C) 2020 Seomse Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.seomse.crawling.core.http;

import com.seomse.commons.utils.ExceptionUtil;
import com.seomse.crawling.CrawlingServer;
import com.seomse.crawling.exception.NodeEndException;
import com.seomse.crawling.node.CrawlingNode;
import com.seomse.crawling.node.CrawlingNodeScript;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.Map;

/**
 * HttpUrlConnection 으로 붇는정보 관리
 * server 에서 이용하는 HttpUrlConnection 이벤트 처리 클래스
 * 소스가 너무 가독성이 떨어지는 경우를 위해서 클래스 분리
 * @author macle
 */
public class HttpUrlConnManager {
	
	private static final Logger logger = LoggerFactory.getLogger(HttpUrlConnManager.class);
	
	private final CrawlingServer server;
	
	private final Map lastNodeMap;
	
	private final Map lockMap;
	
	private final Object lock = new Object();

	/**
	 * 생성자
	 * @param server CrawlingServer
	 */
	public HttpUrlConnManager(CrawlingServer server) {
		this.server = server;
		lastNodeMap = new HashMap<>();
		lockMap = new HashMap<>();
	}

	private boolean isNodeNullLog = true;

	/**
	 * HttpUrlConnection 을 이용한 script 결과 얻기
	 * @param checkUrl String
	 * @param connLimitTime long
	 * @param url String
	 * @param optionData JSONObject
	 * @return String
	 */
	public String getHttpUrlScript(String checkUrl, long connLimitTime, String url, JSONObject optionData) {

		CrawlingNodeScript crawlingNodeScript = getNodeScript(checkUrl, connLimitTime, url, optionData);
		if(crawlingNodeScript == null){
			return null;
		}

		return crawlingNodeScript.getScript();
	}

	/**
	 * CrawlingNodeScript 얻기
	 * CrawlingNode 와
	 * Script
	 * @param checkUrl String
	 * @param connLimitTime long
	 * @param url String
	 * @param optionData JSONObject
	 * @return CrawlingNodeScript
	 */
	public CrawlingNodeScript getNodeScript(String checkUrl, long connLimitTime, String url, JSONObject optionData) {

		CrawlingNode [] nodeArray = server.getNodeArray();
		if(nodeArray.length == 0) {
			if(isNodeNullLog) {
				logger.error("node null...");
				isNodeNullLog = false;
			}
			return null;
		}

		logger.debug("node length: " + nodeArray.length);

		isNodeNullLog = true;

		Object lockObj = lockMap.get(checkUrl);
		if(lockObj == null) {
			synchronized (lock) {
				lockObj = lockMap.get(checkUrl);
				if(lockObj == null) {
					lockObj = new Object();
					lockMap.put(checkUrl, lockObj);
				}
			}
		}

		CrawlingNode node;
		CrawlingNodeScript crawlingNodeScript = null;
		boolean isNodeExecute = true;
		//noinspection SynchronizationOnLocalVariableOrMethodParameter
		synchronized (lockObj) {

			CrawlingNode lastNode = lastNodeMap.get(checkUrl);


			int nextSeq ;

			if(lastNode == null || lastNode.isEnd()) {
				nextSeq = 0;
			}else {
				nextSeq = lastNode.getSeq() + 1;

				if(nextSeq >= nodeArray.length) {
					nextSeq = 0;
				}
			}
			node =  nodeArray[nextSeq];

			Long time = node.getLastConnectTime(checkUrl);
			if(time != null) {
				long gap = System.currentTimeMillis() - time;

				if( gap < connLimitTime	) {
					int saveSeq = nextSeq;

					boolean isNodeChange = false;

					while(true) {
						nextSeq = nextSeq+1;
						if(nextSeq >= nodeArray.length) {
							nextSeq = 0;
						}

						if(saveSeq == nextSeq) {
							break;
						}

						Long checkTime = nodeArray[nextSeq].getLastConnectTime(checkUrl);
						if(checkTime == null ||  System.currentTimeMillis() - checkTime >= connLimitTime) {
							isNodeChange = true;
							node = nodeArray[nextSeq];
						}
					}

					if(!isNodeChange) {
						try {
							Thread.sleep(connLimitTime - gap);
						}catch(Exception e) {
							logger.error(ExceptionUtil.getStackTrace(e));
						}
					}
				}

			}
			try {

				String script = node.getHttpUrlScript(url, optionData);
				node.updateLastConnectTime(checkUrl);

				crawlingNodeScript = new CrawlingNodeScript(node,script);

				lastNodeMap.put(checkUrl, node);
			}catch(NodeEndException e) {
				logger.debug("node end. other node request");
				isNodeExecute = false;
				server.endNode(node);
//				return getHttpUrlScript(checkUrl, connLimitTime, url, optionData);
			}
		}

		if(!isNodeExecute){
			return getNodeScript(checkUrl, connLimitTime, url, optionData);
		}

		return crawlingNodeScript;
	}


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy