com.seomse.crawling.CrawlingServer Maven / Gradle / Ivy
/*
* Copyright (C) 2020 Seomse Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.seomse.crawling;
import com.seomse.api.server.ApiRequestConnectHandler;
import com.seomse.api.server.ApiRequestServer;
import com.seomse.commons.callback.ObjCallback;
import com.seomse.commons.handler.ExceptionHandler;
import com.seomse.crawling.core.http.HttpUrlConnManager;
import com.seomse.crawling.node.CrawlingLocalNode;
import com.seomse.crawling.node.CrawlingNode;
import com.seomse.crawling.node.CrawlingNodeScript;
import com.seomse.crawling.node.CrawlingProxyNode;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.net.Socket;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* CrawlingServer
* @author macle
*/
public class CrawlingServer {
private static final Logger logger = LoggerFactory.getLogger(CrawlingServer.class);
private static final CrawlingNode [] EMPTY_NODE_ARRAY = new CrawlingNode[0];
private final ApiRequestServer requestServer;
//순서정보 저장이 필요할 경우를 위한 list
//메모리 저장용이라서 실제로는 사용되지않음. 실제사용되는건 node array
private final List nodeList = new LinkedList<>();
private CrawlingNode [] nodeArray = EMPTY_NODE_ARRAY;
private final Object lock = new Object();
private final ObjCallback nodeEndCallback;
private final HttpUrlConnManager httpUrlConnManager;
private final Map proxyNodeMap;
/**
* 생성자
* @param port int port
*/
public CrawlingServer(int port){
proxyNodeMap = new Hashtable<>();
nodeEndCallback = arg0 -> {
CrawlingNode crawlingNode = (CrawlingNode)arg0;
endNode(crawlingNode);
};
ApiRequestConnectHandler connectHandler = request -> {
Socket socket = request.getSocket();
InetAddress inetAddress = socket.getInetAddress();
String nodeKey = inetAddress.getHostAddress() +"," + inetAddress.getHostName();
CrawlingProxyNode crawlingProxyNode = proxyNodeMap.get(nodeKey);
synchronized (lock) {
boolean isNew = false;
if (crawlingProxyNode == null) {
crawlingProxyNode = new CrawlingProxyNode(nodeKey);
proxyNodeMap.put(nodeKey, crawlingProxyNode);
crawlingProxyNode.setExceptionHandler(exceptionHandler);
ObjCallback endCallback = o -> endNode((CrawlingProxyNode)o);
crawlingProxyNode.setEndCallback(endCallback);
isNew = true;
}
crawlingProxyNode.addRequest(request);
if (isNew) {
nodeList.add(crawlingProxyNode);
CrawlingNode [] array = nodeList.toArray(new CrawlingNode[0]);
for (int i = 0; i < array.length; i++) {
array[i].setSeq(i);
}
nodeArray = array;
logger.debug("new proxy node connect: " + nodeKey + ", node length: " + nodeArray.length);
}
}
};
requestServer = new ApiRequestServer(port, connectHandler);
httpUrlConnManager = new HttpUrlConnManager(this);
}
private ExceptionHandler exceptionHandler;
/**
* 예외 핸들러 설정
* @param exceptionHandler ExceptionHandler exceptionHandler
*/
public void setExceptionHandler(ExceptionHandler exceptionHandler) {
this.exceptionHandler = exceptionHandler;
}
/**
* node 종료
* @param crawlingNode CrawlingNode crawlingNode
*/
public void endNode(CrawlingNode crawlingNode) {
synchronized (lock) {
if(nodeList.remove(crawlingNode)) {
if(nodeList.size() == 0) {
nodeArray = EMPTY_NODE_ARRAY;
}else {
CrawlingNode [] nodeArray = nodeList.toArray(new CrawlingNode[0]);
for(int i=0 ; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy